Effectiveness of Ferritin‐guided Donation Intervals in Blood Donors: results of the Stepped Wedge Cluster‐randomized FIND’EM Trial (questionnaires)

Data

We load and scale the data.

Code
data_clean <- read_sav("~/Amber/Data/FINDEM/Final_survey.sav") 
data_clean <- data_clean %>% dplyr::select(-Einnummer, -Cluster, -Date, -Timepoint, -Meetweek) %>% 
  mutate(Gender = factor(Gender) %>% fct_recode("Male" = "1", "Female" = "2")) %>%
  mutate(PICA = factor(PICA) %>% fct_recode("Yes" = "1", "No" = "0")) %>% 
  mutate(RLS = factor(case_when(RLS == 2 ~ 1, RLS == 1 ~ 0)) %>% fct_recode("Yes" = "1", "No" = "0")) %>% 
  mutate(Intervention_months_factor = factor(case_when(Intervention_months_s < 6 ~ 0, Intervention_months_s >= 6 & Intervention_months_s <12 ~ 1, Intervention_months_s >= 12 & Intervention_months_s < 18 ~ 2, Intervention_months_s >= 18  & Intervention_months_s < 24 ~ 3,Intervention_months_s >= 24 ~ 4))) %>% 
  mutate(Fergroup = factor(case_when(Ferritin < 15 ~ 1, Ferritin >= 15 & Ferritin <= 30~ 2, Ferritin > 30 ~ 3)) %>%  fct_recode("Ferritin < 15" = "1", "Ferritin 15-30" = "2", "Ferritin > 30" = "3")) %>% 
  mutate(Warmglow = as.numeric(Warmglow))  %>% mutate(Warmglow = as.factor(case_when(Warmglow >=6 ~ 1, TRUE & !is.na(Warmglow) ~ 0)) %>% fct_recode("Yes" = "1", "No" = "0"), StartDate = substr(StartDate, 1, 10), StartDate = as.Date(StartDate, "%Y-%m-%d"), Meetweek = case_when(StartDate < "2019-11-01" ~ 2, StartDate > "2019-11-01" ~ 3))

data_scale <- data_clean
data_scale$Age <- scale(data_scale$Age, center=TRUE, scale = FALSE)
data_scale$Weight <- scale(data_scale$Weight, center=TRUE, scale = FALSE)
data_scale$Height <- scale(data_scale$Height, center=TRUE, scale = FALSE)

Descriptives

We present a descriptives table for the outcomes we will analyse, including:

  • PICA
  • Restless legs syndrome (RLS)
  • Cognitive functioning (CFQ)
  • Physical health (SF_ph)
  • Mental health (SF_mh)
  • Warm glow
  • Fatigue (CIS)
Code
 #| label: t1 premenopausal females
data_t1 <- data_clean %>% mutate(stap2 = as.factor(stap), stap2 = recode_factor(stap2, "2.1"="2","2.2"="2", "3.1"="3", "3.2"="3"), stap2 = factor(stap2,levels = c("1", "2", "3", "4"))) %>% filter(Meetweek ==2) %>% mutate(Hb = replace(Hb, Hb == 999, NA), Hb = Hb/0.6206)

prefemalest1<-table1::table1(~ Age + Hb + Ferritin + factor(PICA) + factor(RLS) + CFQ_Total + SF_ph + SF_mh + Warmglow + CIS_Totalmean | factor(stap2), data = subset(data_t1, data_t1$Gender=="Female" & data_t1$PostMeno==0), caption = "Premenopausal females", render.continuous=c(.="Mean (SD)", .="Median [Q1, Q3]"), render.categorical=c(.="FREQ (PCT)"))
prefemalest1
Premenopausal females
1
(N=301)
2
(N=310)
3
(N=307)
4
(N=212)
Overall
(N=1130)
Age
Mean (SD) 28.4 (7.61) 28.1 (7.75) 27.3 (7.29) 27.5 (7.75) 27.9 (7.59)
Median [Q1, Q3] 27.0 [22.0, 34.0] 26.0 [22.0, 33.0] 25.0 [21.0, 32.5] 25.0 [21.8, 33.0] 26.0 [22.0, 33.0]
Hb
Mean (SD) 13.5 (0.805) 13.5 (0.775) 13.6 (0.821) 13.5 (0.779) 13.5 (0.797)
Median [Q1, Q3] 13.4 [12.9, 14.0] 13.4 [12.9, 13.9] 13.5 [13.0, 14.2] 13.5 [13.1, 14.0] 13.4 [12.9, 14.0]
Missing 0 (0%) 1 (0.3%) 0 (0%) 1 (0.5%) 2 (0.2%)
Ferritin
Mean (SD) 29.7 (25.0) 29.2 (25.5) 29.3 (35.3) 28.4 (29.4) 29.2 (29.0)
Median [Q1, Q3] 22.7 [12.1, 40.0] 22.6 [11.5, 38.2] 19.9 [10.1, 39.0] 20.8 [11.3, 34.9] 21.6 [11.3, 38.5]
Missing 7 (2.3%) 12 (3.9%) 15 (4.9%) 16 (7.5%) 50 (4.4%)
factor(PICA)
No 283 (94.0) 296 (95.5) 299 (97.4) 197 (92.9) 1075 (95.1)
Yes 5 (1.7) 4 (1.3) 3 (1.0) 10 (4.7) 22 (1.9)
Missing 13 (4.3%) 10 (3.2%) 5 (1.6%) 5 (2.4%) 33 (2.9%)
factor(RLS)
No 237 (78.7) 249 (80.3) 231 (75.2) 164 (77.4) 881 (78.0)
Yes 0 (0) 2 (0.6) 1 (0.3) 3 (1.4) 6 (0.5)
Missing 64 (21.3%) 59 (19.0%) 75 (24.4%) 45 (21.2%) 243 (21.5%)
CFQ_Total
Mean (SD) 30.8 (13.5) 29.3 (11.9) 31.0 (11.9) 32.2 (13.8) 30.7 (12.7)
Median [Q1, Q3] 30.0 [21.5, 37.5] 29.0 [21.0, 38.0] 30.0 [23.0, 38.0] 31.0 [22.3, 41.0] 30.0 [22.0, 38.5]
Missing 14 (4.7%) 10 (3.2%) 5 (1.6%) 6 (2.8%) 35 (3.1%)
SF_ph
Mean (SD) 89.7 (10.2) 90.6 (9.40) 90.3 (9.51) 89.5 (9.54) 90.1 (9.68)
Median [Q1, Q3] 92.5 [86.2, 96.3] 93.1 [87.5, 96.3] 92.5 [87.5, 96.3] 92.4 [85.0, 96.2] 92.5 [86.9, 96.3]
Missing 6 (2.0%) 3 (1.0%) 2 (0.7%) 4 (1.9%) 15 (1.3%)
SF_mh
Mean (SD) 77.4 (13.7) 77.4 (14.3) 77.4 (14.7) 75.8 (15.9) 77.1 (14.6)
Median [Q1, Q3] 82.0 [73.5, 86.5] 82.0 [74.4, 86.3] 82.8 [75.1, 86.5] 80.9 [72.3, 86.3] 82.1 [73.8, 86.5]
Missing 3 (1.0%) 3 (1.0%) 1 (0.3%) 3 (1.4%) 10 (0.9%)
Warmglow
No 119 (39.5) 130 (41.9) 134 (43.6) 109 (51.4) 492 (43.5)
Yes 162 (53.8) 166 (53.5) 163 (53.1) 91 (42.9) 582 (51.5)
Missing 20 (6.6%) 14 (4.5%) 10 (3.3%) 12 (5.7%) 56 (5.0%)
CIS_Totalmean
Mean (SD) 82.9 (6.41) 82.5 (6.05) 82.7 (6.13) 81.9 (5.76) 82.5 (6.12)
Median [Q1, Q3] 84.0 [80.0, 87.0] 83.0 [79.0, 87.0] 84.0 [79.0, 87.0] 83.0 [78.0, 86.0] 83.0 [79.0, 87.0]
Missing 12 (4.0%) 7 (2.3%) 3 (1.0%) 4 (1.9%) 26 (2.3%)
Code
postfemalest1<-table1::table1(~ Age + Hb + Ferritin + factor(PICA) + factor(RLS) + CFQ_Total + SF_ph + SF_mh + Warmglow + CIS_Totalmean | factor(stap2), data = subset(data_t1, data_t1$Gender=="Female" & data_t1$PostMeno==1), caption = "Postmenopausal females", render.continuous=c(.="Mean (SD)", .="Median [Q1, Q3]"), render.categorical=c(.="FREQ (PCT)"))
postfemalest1
Postmenopausal females
1
(N=201)
2
(N=195)
3
(N=227)
4
(N=142)
Overall
(N=765)
Age
Mean (SD) 55.3 (6.90) 56.0 (6.80) 55.3 (6.42) 55.8 (7.05) 55.6 (6.76)
Median [Q1, Q3] 54.0 [50.0, 61.0] 56.0 [50.0, 61.0] 55.0 [50.0, 60.0] 55.0 [50.0, 61.8] 55.0 [50.0, 61.0]
Hb
Mean (SD) 13.7 (0.856) 13.7 (0.825) 13.7 (0.848) 13.7 (0.822) 13.7 (0.838)
Median [Q1, Q3] 13.5 [13.1, 14.2] 13.7 [13.1, 14.2] 13.5 [13.1, 14.3] 13.5 [13.1, 14.0] 13.5 [13.1, 14.2]
Ferritin
Mean (SD) 34.9 (28.6) 32.2 (24.6) 29.9 (26.7) 31.1 (25.3) 32.0 (26.5)
Median [Q1, Q3] 29.3 [16.3, 41.5] 25.6 [15.0, 44.7] 22.4 [10.4, 39.2] 26.3 [15.0, 41.5] 26.1 [14.1, 41.5]
Missing 5 (2.5%) 5 (2.6%) 8 (3.5%) 3 (2.1%) 21 (2.7%)
factor(PICA)
No 196 (97.5) 192 (98.5) 218 (96.0) 139 (97.9) 745 (97.4)
Yes 4 (2.0) 1 (0.5) 4 (1.8) 2 (1.4) 11 (1.4)
Missing 1 (0.5%) 2 (1.0%) 5 (2.2%) 1 (0.7%) 9 (1.2%)
factor(RLS)
No 158 (78.6) 147 (75.4) 166 (73.1) 105 (73.9) 576 (75.3)
Yes 0 (0) 4 (2.1) 7 (3.1) 3 (2.1) 14 (1.8)
Missing 43 (21.4%) 44 (22.6%) 54 (23.8%) 34 (23.9%) 175 (22.9%)
CFQ_Total
Mean (SD) 26.4 (12.2) 25.3 (10.9) 25.0 (9.84) 26.0 (12.6) 25.6 (11.3)
Median [Q1, Q3] 26.0 [18.0, 34.0] 24.0 [17.0, 32.0] 25.0 [18.0, 32.0] 25.0 [16.0, 34.0] 25.0 [17.0, 33.0]
Missing 1 (0.5%) 2 (1.0%) 5 (2.2%) 1 (0.7%) 9 (1.2%)
SF_ph
Mean (SD) 87.4 (11.7) 87.8 (12.6) 88.0 (12.1) 88.5 (9.62) 87.9 (11.7)
Median [Q1, Q3] 89.9 [83.7, 96.2] 92.4 [84.9, 96.3] 91.3 [84.4, 96.3] 90.0 [85.6, 94.9] 91.2 [84.9, 95.0]
Missing 1 (0.5%) 2 (1.0%) 4 (1.8%) 1 (0.7%) 8 (1.0%)
SF_mh
Mean (SD) 81.4 (13.0) 81.6 (11.3) 81.2 (12.2) 81.3 (11.9) 81.4 (12.1)
Median [Q1, Q3] 85.4 [78.4, 88.8] 84.3 [79.0, 88.8] 84.6 [79.0, 87.8] 84.0 [80.3, 88.1] 84.7 [78.9, 88.5]
Missing 1 (0.5%) 1 (0.5%) 3 (1.3%) 0 (0%) 5 (0.7%)
Warmglow
No 76 (37.8) 57 (29.2) 70 (30.8) 48 (33.8) 251 (32.8)
Yes 118 (58.7) 129 (66.2) 148 (65.2) 92 (64.8) 487 (63.7)
Missing 7 (3.5%) 9 (4.6%) 9 (4.0%) 2 (1.4%) 27 (3.5%)
CIS_Totalmean
Mean (SD) 86.1 (6.12) 85.8 (7.03) 86.5 (6.44) 85.1 (5.72) 85.9 (6.39)
Median [Q1, Q3] 86.0 [84.0, 90.0] 86.0 [82.0, 89.0] 87.0 [83.2, 89.0] 85.1 [82.0, 89.0] 86.0 [83.0, 89.0]
Missing 1 (0.5%) 2 (1.0%) 5 (2.2%) 0 (0%) 8 (1.0%)
Code
malest1<-table1::table1(~ Age + Hb + Ferritin + factor(PICA) + factor(RLS) + CFQ_Total + SF_ph + SF_mh + Warmglow + CIS_Totalmean | factor(stap2), data = subset(data_t1, data_t1$Gender=="Male"), caption = "Males", render.continuous=c(.="Mean (SD)", .="Median [Q1, Q3]"), render.categorical=c(.="FREQ (PCT)"))
malest1
Males
1
(N=345)
2
(N=411)
3
(N=501)
4
(N=335)
Overall
(N=1592)
Age
Mean (SD) 46.0 (15.2) 45.6 (15.2) 45.4 (15.2) 47.8 (15.4) 46.1 (15.2)
Median [Q1, Q3] 48.0 [32.0, 59.0] 48.0 [31.5, 59.0] 48.0 [32.0, 58.0] 51.0 [34.0, 61.0] 49.0 [32.0, 59.0]
Hb
Mean (SD) 15.0 (1.00) 15.0 (0.961) 14.9 (0.956) 14.8 (0.932) 14.9 (0.966)
Median [Q1, Q3] 15.0 [14.2, 15.6] 14.8 [14.3, 15.5] 14.7 [14.2, 15.6] 14.8 [14.0, 15.5] 14.8 [14.2, 15.6]
Missing 0 (0%) 2 (0.5%) 1 (0.2%) 0 (0%) 3 (0.2%)
Ferritin
Mean (SD) 52.9 (59.7) 43.1 (42.9) 40.7 (47.5) 37.1 (41.8) 43.2 (48.5)
Median [Q1, Q3] 35.3 [21.2, 61.7] 29.8 [15.1, 53.9] 26.7 [12.9, 46.2] 23.0 [12.2, 43.4] 28.4 [14.7, 50.7]
Missing 4 (1.2%) 9 (2.2%) 10 (2.0%) 13 (3.9%) 36 (2.3%)
factor(PICA)
No 333 (96.5) 393 (95.6) 471 (94.0) 321 (95.8) 1518 (95.4)
Yes 9 (2.6) 8 (1.9) 15 (3.0) 4 (1.2) 36 (2.3)
Missing 3 (0.9%) 10 (2.4%) 15 (3.0%) 10 (3.0%) 38 (2.4%)
factor(RLS)
No 290 (84.1) 341 (83.0) 403 (80.4) 278 (83.0) 1312 (82.4)
Yes 2 (0.6) 0 (0) 5 (1.0) 2 (0.6) 9 (0.6)
Missing 53 (15.4%) 70 (17.0%) 93 (18.6%) 55 (16.4%) 271 (17.0%)
CFQ_Total
Mean (SD) 25.2 (11.6) 23.9 (10.9) 24.5 (11.3) 23.4 (11.1) 24.3 (11.2)
Median [Q1, Q3] 25.0 [17.0, 32.0] 23.0 [17.0, 30.0] 24.0 [16.0, 31.0] 23.0 [17.0, 29.0] 24.0 [17.0, 31.0]
Missing 6 (1.7%) 10 (2.4%) 12 (2.4%) 13 (3.9%) 41 (2.6%)
SF_ph
Mean (SD) 90.6 (8.64) 91.3 (9.32) 91.7 (7.92) 90.6 (9.81) 91.1 (8.87)
Median [Q1, Q3] 92.5 [87.4, 96.3] 93.8 [88.8, 97.5] 93.8 [89.9, 96.3] 92.5 [87.5, 96.3] 93.7 [88.7, 96.3]
Missing 3 (0.9%) 6 (1.5%) 7 (1.4%) 5 (1.5%) 21 (1.3%)
SF_mh
Mean (SD) 82.1 (11.9) 83.3 (11.1) 82.2 (12.1) 83.5 (10.9) 82.7 (11.6)
Median [Q1, Q3] 85.4 [79.8, 89.0] 86.4 [80.9, 89.8] 85.5 [80.4, 88.8] 86.5 [81.0, 89.8] 85.8 [80.5, 89.0]
Missing 2 (0.6%) 5 (1.2%) 4 (0.8%) 5 (1.5%) 16 (1.0%)
Warmglow
No 119 (34.5) 123 (29.9) 164 (32.7) 110 (32.8) 516 (32.4)
Yes 211 (61.2) 272 (66.2) 323 (64.5) 207 (61.8) 1013 (63.6)
Missing 15 (4.3%) 16 (3.9%) 14 (2.8%) 18 (5.4%) 63 (4.0%)
CIS_Totalmean
Mean (SD) 85.1 (5.95) 86.1 (6.24) 85.3 (5.97) 85.8 (5.54) 85.6 (5.96)
Median [Q1, Q3] 85.3 [82.0, 88.0] 86.0 [82.1, 89.0] 86.0 [82.0, 89.0] 86.0 [83.0, 89.0] 86.0 [82.0, 89.0]
Missing 3 (0.9%) 9 (2.2%) 12 (2.4%) 9 (2.7%) 33 (2.1%)

Analysis

PICA

PICA is diagnosed when donor answers yes to: do you crave and regularly eat non-food items such as ice, clay, mud, sand, raw pasta, chalk or charcoal? (translated)

Models

Code
PICA_fitM <- glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$Gender == "Male" ,family = binomial)
PICA_fit_preF <- glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$Gender == "Female" & data_scale$PostMeno == 0 ,family = binomial)
PICA_fit_postF <- glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$Gender == "Female" & data_scale$PostMeno == 1 ,family = binomial)

pl <- c(
  `(Intercept)` = "Intercept",
  Intervention_months_factor0 = "0-5 months",
  Intervention_months_factor1 = "6-11 months",
  Intervention_months_factor2 = "12-17 months",
  Intervention_months_factor3 = "18-23 months",
  Intervention_months_factor4 = "24+ months"
)

tab_model(PICA_fit_preF, PICA_fit_postF, PICA_fitM, pred.labels = pl, dv.labels = c("Pre-menopausal women", "Post-menopausal women", "Men"), show.reflvl = T, title = "PICA")
PICA
  Pre-menopausal women Post-menopausal women Men
Predictors Odds Ratios CI p Odds Ratios CI p Odds Ratios CI p
Intercept 0.01 0.00 – 0.02 <0.001 0.02 0.01 – 0.05 <0.001 0.03 0.02 – 0.04 <0.001
Age 0.93 0.88 – 0.97 0.001 0.99 0.93 – 1.05 0.694 1.00 0.98 – 1.01 0.645
Weight 1.03 1.00 – 1.05 0.016 1.03 0.99 – 1.06 0.128 1.02 1.01 – 1.04 0.007
Height 0.98 0.93 – 1.02 0.284 0.98 0.92 – 1.05 0.546 0.98 0.95 – 1.01 0.219
6-11 months 1.06 0.48 – 2.18 0.885 0.53 0.08 – 2.05 0.421 0.68 0.32 – 1.31 0.280
12-17 months 0.75 0.29 – 1.67 0.502 1.66 0.60 – 4.38 0.308 1.00 0.55 – 1.72 0.993
18-23 months 1.28 0.50 – 2.88 0.571 1.62 0.25 – 6.31 0.538 1.11 0.48 – 2.25 0.794
24+ months 0.80 0.29 – 1.86 0.629 1.93 0.52 – 5.87 0.275 0.91 0.47 – 1.65 0.766
Observations 2345 1465 3545
R2 Tjur 0.009 0.004 0.003

Model assumptions

Premenopausal females:

Code
plot(PICA_fit_preF, which = 4, id.n = 3)

Code
#Inspect the largest:
model.data <- augment(PICA_fit_preF) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) 
# A tibble: 3 × 13
  .rownames PICA  Age[,1] Weight[,1] Height[,1] Intervention_months_fa…¹ .fitted
  <chr>     <fct>   <dbl>      <dbl>      <dbl> <fct>                      <dbl>
1 3788      Yes     -6.02    -11.2       -20.8  4                          -4.38
2 4453      Yes      1.98     -0.189     -13.8  4                          -4.88
3 7722      Yes    -21.0      36.8        -1.76 3                          -1.98
# ℹ abbreviated name: ¹​Intervention_months_factor
# ℹ 6 more variables: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
#Plot standardized residuals: 
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = PICA), alpha = .5) +
  theme_bw()

Code
# Filter potential influential data point
model.data %>% 
  filter(abs(.std.resid) > 3) # 4 influential data points
# A tibble: 4 × 13
  .rownames PICA   Age[,1] Weight[,1] Height[,1] Intervention_months_f…¹ .fitted
  <chr>     <fct>    <dbl>      <dbl>      <dbl> <fct>                     <dbl>
1 3642      Yes   -11.0       -23.2        -8.76 4                         -4.61
2 4028      Yes    -0.0154    -10.2        -3.76 3                         -4.76
3 4453      Yes     1.98       -0.189     -13.8  4                         -4.88
4 7651      Yes     0.985     -20.2       -14.8  0                         -5.07
# ℹ abbreviated name: ¹​Intervention_months_factor
# ℹ 6 more variables: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow(data_scale)
cooksd = cooks.distance(PICA_fit_preF)
plot(cooksd, main="Cooks Distance for Influential Obs")
abline(h = (8/n) * 3, lty = 2, col = "steelblue") # add cutoff line

Code
# influence plot
car::influencePlot(PICA_fit_preF,col="red",id.n=3)

        StudRes         Hat       CookD
3788  3.0187720 0.004100397 0.041247398
4453  3.1823811 0.002745063 0.045224283
5873 -0.6309422 0.073630863 0.002253238
7651  3.2299332 0.001706341 0.034185759
7722  2.1271122 0.039465735 0.038884139
Code
car::influenceIndexPlot(PICA_fit_preF, id.n=3)

Code
# check without some outliers
data_scale2 <- data_scale[c(-4453, -3788, -7722), ]
PICA_fit_preF_upd1 <- update(PICA_fit_preF, data = data_scale2, subset= data_scale2$Gender == "Female" & data_scale2$PostMeno == 0)
car::compareCoefs(PICA_fit_preF,PICA_fit_preF_upd1) # no improvement
Calls:
1: glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor, 
  family = binomial, data = data_scale, subset = data_scale$Gender == 
  "Female" & data_scale$PostMeno == 0)
2: glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor, 
  family = binomial, data = data_scale2, subset = data_scale2$Gender == 
  "Female" & data_scale2$PostMeno == 0)

                            Model 1 Model 2
(Intercept)                  -4.838  -4.978
SE                            0.449   0.480
                                           
Age                         -0.0762 -0.0873
SE                           0.0226  0.0243
                                           
Weight                       0.0261  0.0210
SE                           0.0109  0.0117
                                           
Height                      -0.0249 -0.0110
SE                           0.0232  0.0238
                                           
Intervention_months_factor1  0.0552  0.0624
SE                           0.3830  0.3830
                                           
Intervention_months_factor2  -0.293  -0.284
SE                            0.435   0.436
                                           
Intervention_months_factor3  0.2481  0.0957
SE                           0.4382  0.4646
                                           
Intervention_months_factor4  -0.224  -0.628
SE                            0.463   0.546
                                           
Code
summary(PICA_fit_preF)

Call:
glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor, 
    family = binomial, data = data_scale, subset = data_scale$Gender == 
        "Female" & data_scale$PostMeno == 0)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.6176  -0.2450  -0.2033  -0.1570   3.1874  

Coefficients:
                            Estimate Std. Error z value Pr(>|z|)    
(Intercept)                 -4.83824    0.44894 -10.777  < 2e-16 ***
Age                         -0.07616    0.02259  -3.372 0.000746 ***
Weight                       0.02613    0.01088   2.402 0.016300 *  
Height                      -0.02488    0.02323  -1.071 0.284121    
Intervention_months_factor1  0.05523    0.38299   0.144 0.885335    
Intervention_months_factor2 -0.29262    0.43547  -0.672 0.501609    
Intervention_months_factor3  0.24805    0.43821   0.566 0.571356    
Intervention_months_factor4 -0.22381    0.46300  -0.483 0.628810    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 514.02  on 2344  degrees of freedom
Residual deviance: 496.47  on 2337  degrees of freedom
  (685 observations deleted due to missingness)
AIC: 512.47

Number of Fisher Scoring iterations: 7
Code
summary(PICA_fit_preF_upd1)

Call:
glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor, 
    family = binomial, data = data_scale2, subset = data_scale2$Gender == 
        "Female" & data_scale2$PostMeno == 0)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.5675  -0.2446  -0.1964  -0.1426   3.2650  

Coefficients:
                            Estimate Std. Error z value Pr(>|z|)    
(Intercept)                 -4.97806    0.48015 -10.368  < 2e-16 ***
Age                         -0.08729    0.02435  -3.586 0.000336 ***
Weight                       0.02095    0.01169   1.792 0.073148 .  
Height                      -0.01095    0.02381  -0.460 0.645551    
Intervention_months_factor1  0.06238    0.38298   0.163 0.870616    
Intervention_months_factor2 -0.28433    0.43556  -0.653 0.513890    
Intervention_months_factor3  0.09570    0.46465   0.206 0.836825    
Intervention_months_factor4 -0.62845    0.54566  -1.152 0.249435    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 491.23  on 2341  degrees of freedom
Residual deviance: 472.08  on 2334  degrees of freedom
  (685 observations deleted due to missingness)
AIC: 488.08

Number of Fisher Scoring iterations: 7
Code
#multicollinearity
car::vif(PICA_fit_preF) # no VIF value that exceeds 5 or 10 
                               GVIF Df GVIF^(1/(2*Df))
Age                        1.056045  1        1.027641
Weight                     1.163751  1        1.078773
Height                     1.111582  1        1.054316
Intervention_months_factor 1.006976  4        1.000869

Postmenopausal females:

Code
plot(PICA_fit_postF, which = 4, id.n = 3)

Code
#Inspect the largest:
model.data <- augment(PICA_fit_postF) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) 
# A tibble: 3 × 13
  .rownames PICA  Age[,1] Weight[,1] Height[,1] Intervention_months_fa…¹ .fitted
  <chr>     <fct>   <dbl>      <dbl>      <dbl> <fct>                      <dbl>
1 1968      Yes      22.0       6.81      -6.76 1                          -4.69
2 3889      Yes      17.0     -19.2      -18.8  3                          -3.92
3 7354      Yes      25.0      -1.19      -3.76 1                          -4.98
# ℹ abbreviated name: ¹​Intervention_months_factor
# ℹ 6 more variables: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
#Plot standardized residuals: 
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = PICA), alpha = .5) +
  theme_bw()

Code
# Filter potential influential data point
model.data %>% 
  filter(abs(.std.resid) > 3) # 4 influential data points
# A tibble: 3 × 13
  .rownames PICA  Age[,1] Weight[,1] Height[,1] Intervention_months_fa…¹ .fitted
  <chr>     <fct>   <dbl>      <dbl>      <dbl> <fct>                      <dbl>
1 1968      Yes      22.0       6.81      -6.76 1                          -4.69
2 6560      Yes      27.0      -9.19      -7.76 0                          -4.50
3 7354      Yes      25.0      -1.19      -3.76 1                          -4.98
# ℹ abbreviated name: ¹​Intervention_months_factor
# ℹ 6 more variables: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow(data_scale)
cooksd = cooks.distance(PICA_fit_postF)
plot(cooksd, main="Cooks Distance for Influential Obs")
abline(h = (8/n) * 3, lty = 2, col = "steelblue") # add cutoff line

Code
# influence plot
car::influencePlot(PICA_fit_postF,col="red",id.n=3)

       StudRes         Hat        CookD
1968  3.159646 0.005396830 0.0740710103
2897 -0.420213 0.060323738 0.0007615856
3889  2.916690 0.012066170 0.0782557928
4404 -0.259491 0.090758385 0.0004468314
7354  3.262409 0.004500247 0.0829501987
Code
car::influenceIndexPlot(PICA_fit_postF, id.n=3)

Code
# check without some outliers
data_scale2 <- data_scale[c(-1968, -3889, -7354), ]
PICA_fit_postF_upd1 <- update(PICA_fit_postF, data = data_scale2, subset= data_scale2$Gender == "Female" & data_scale2$PostMeno == 0)
car::compareCoefs(PICA_fit_postF,PICA_fit_postF_upd1) # no improvement
Calls:
1: glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor, 
  family = binomial, data = data_scale, subset = data_scale$Gender == 
  "Female" & data_scale$PostMeno == 1)
2: glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor, 
  family = binomial, data = data_scale2, subset = data_scale2$Gender == 
  "Female" & data_scale2$PostMeno == 0)

                            Model 1 Model 2
(Intercept)                  -4.099  -4.838
SE                            0.539   0.449
                                           
Age                         -0.0121 -0.0762
SE                           0.0308  0.0226
                                           
Weight                       0.0250  0.0261
SE                           0.0164  0.0109
                                           
Height                      -0.0201 -0.0249
SE                           0.0333  0.0232
                                           
Intervention_months_factor1 -0.6280  0.0552
SE                           0.7799  0.3830
                                           
Intervention_months_factor2   0.509  -0.293
SE                            0.499   0.435
                                           
Intervention_months_factor3   0.483   0.248
SE                            0.785   0.438
                                           
Intervention_months_factor4   0.655  -0.224
SE                            0.600   0.463
                                           
Code
summary(PICA_fit_postF)

Call:
glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor, 
    family = binomial, data = data_scale, subset = data_scale$Gender == 
        "Female" & data_scale$PostMeno == 1)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.4133  -0.2063  -0.1734  -0.1483   3.1595  

Coefficients:
                            Estimate Std. Error z value Pr(>|z|)    
(Intercept)                 -4.09907    0.53920  -7.602 2.91e-14 ***
Age                         -0.01214    0.03081  -0.394    0.694    
Weight                       0.02500    0.01644   1.521    0.128    
Height                      -0.02009    0.03328  -0.604    0.546    
Intervention_months_factor1 -0.62798    0.77987  -0.805    0.421    
Intervention_months_factor2  0.50858    0.49885   1.020    0.308    
Intervention_months_factor3  0.48344    0.78492   0.616    0.538    
Intervention_months_factor4  0.65540    0.60015   1.092    0.275    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 253.11  on 1464  degrees of freedom
Residual deviance: 247.37  on 1457  degrees of freedom
  (608 observations deleted due to missingness)
AIC: 263.37

Number of Fisher Scoring iterations: 7
Code
summary(PICA_fit_postF_upd1)

Call:
glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor, 
    family = binomial, data = data_scale2, subset = data_scale2$Gender == 
        "Female" & data_scale2$PostMeno == 0)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.6176  -0.2450  -0.2033  -0.1570   3.1874  

Coefficients:
                            Estimate Std. Error z value Pr(>|z|)    
(Intercept)                 -4.83824    0.44894 -10.777  < 2e-16 ***
Age                         -0.07616    0.02259  -3.372 0.000746 ***
Weight                       0.02613    0.01088   2.402 0.016300 *  
Height                      -0.02488    0.02323  -1.071 0.284121    
Intervention_months_factor1  0.05523    0.38299   0.144 0.885335    
Intervention_months_factor2 -0.29262    0.43547  -0.672 0.501609    
Intervention_months_factor3  0.24805    0.43821   0.566 0.571356    
Intervention_months_factor4 -0.22381    0.46300  -0.483 0.628810    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 514.02  on 2344  degrees of freedom
Residual deviance: 496.47  on 2337  degrees of freedom
  (685 observations deleted due to missingness)
AIC: 512.47

Number of Fisher Scoring iterations: 7
Code
#multicollinearity
car::vif(PICA_fit_postF) # no VIF value that exceeds 5 or 10 
                               GVIF Df GVIF^(1/(2*Df))
Age                        1.040166  1        1.019885
Weight                     1.158062  1        1.076133
Height                     1.185532  1        1.088821
Intervention_months_factor 1.011547  4        1.001436

Males:

Code
plot(PICA_fitM, which = 4, id.n = 3)

Code
#Inspect the largest:
model.data <- augment(PICA_fitM) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) 
# A tibble: 3 × 13
  .rownames PICA  Age[,1] Weight[,1] Height[,1] Intervention_months_fa…¹ .fitted
  <chr>     <fct>   <dbl>      <dbl>      <dbl> <fct>                      <dbl>
1 897       Yes     -9.02      66.8       23.2  2                          -2.54
2 6725      Yes    -11.0       51.8       -2.76 4                          -2.46
3 8024      Yes     24.0        8.81     -24.8  0                          -3.00
# ℹ abbreviated name: ¹​Intervention_months_factor
# ℹ 6 more variables: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
#Plot standardized residuals: 
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = PICA), alpha = .5) +
  theme_bw()

Code
# Filter potential influential data point
model.data %>% 
  filter(abs(.std.resid) > 3) # 4 influential data points
# A tibble: 0 × 13
# ℹ 13 variables: .rownames <chr>, PICA <fct>, Age <dbl[,1]>, Weight <dbl[,1]>,
#   Height <dbl[,1]>, Intervention_months_factor <fct>, .fitted <dbl>,
#   .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
#   index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow(data_scale)
cooksd = cooks.distance(PICA_fitM)
plot(cooksd, main="Cooks Distance for Influential Obs")
abline(h = (8/n) * 3, lty = 2, col = "steelblue") # add cutoff line

Code
# influence plot
car::influencePlot(PICA_fitM,col="red",id.n=3)

        StudRes         Hat        CookD
1294 -0.3973451 0.068791951 0.0007832811
5248 -0.4809799 0.032709351 0.0005258886
6621  2.9351986 0.002152348 0.0183783288
6725  2.3060317 0.020059249 0.0304778698
7465  2.9609268 0.002124400 0.0195002389
8024  2.5155578 0.011478062 0.0294584111
Code
car::influenceIndexPlot(PICA_fitM, id.n=3)

Code
# check without some outliers
data_scale2 <- data_scale[c(-6621, -1294, -7465), ]
PICA_fitM_upd1 <- update(PICA_fitM, data = data_scale2, subset= data_scale2$Gender == "Female" & data_scale2$PostMeno == 0)
car::compareCoefs(PICA_fitM,PICA_fitM_upd1) # no improvement
Calls:
1: glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor, 
  family = binomial, data = data_scale, subset = data_scale$Gender == "Male")
2: glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor, 
  family = binomial, data = data_scale2, subset = data_scale2$Gender == 
  "Female" & data_scale2$PostMeno == 0)

                             Model 1  Model 2
(Intercept)                   -3.590   -4.838
SE                             0.183    0.449
                                             
Age                         -0.00332 -0.07616
SE                           0.00721  0.02259
                                             
Weight                       0.02196  0.02613
SE                           0.00816  0.01088
                                             
Height                       -0.0193  -0.0249
SE                            0.0157   0.0232
                                             
Intervention_months_factor1  -0.3809   0.0552
SE                            0.3526   0.3830
                                             
Intervention_months_factor2 -0.00243 -0.29262
SE                           0.28825  0.43547
                                             
Intervention_months_factor3    0.102    0.248
SE                             0.389    0.438
                                             
Intervention_months_factor4  -0.0948  -0.2238
SE                            0.3190   0.4630
                                             
Code
summary(PICA_fitM)

Call:
glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor, 
    family = binomial, data = data_scale, subset = data_scale$Gender == 
        "Male")

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.4767  -0.2463  -0.2259  -0.2076   2.9345  

Coefficients:
                             Estimate Std. Error z value Pr(>|z|)    
(Intercept)                 -3.589998   0.182792 -19.640  < 2e-16 ***
Age                         -0.003321   0.007205  -0.461  0.64490    
Weight                       0.021961   0.008161   2.691  0.00712 ** 
Height                      -0.019276   0.015696  -1.228  0.21943    
Intervention_months_factor1 -0.380917   0.352632  -1.080  0.28005    
Intervention_months_factor2 -0.002432   0.288246  -0.008  0.99327    
Intervention_months_factor3  0.101603   0.388965   0.261  0.79393    
Intervention_months_factor4 -0.094791   0.319002  -0.297  0.76635    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 875.12  on 3544  degrees of freedom
Residual deviance: 866.95  on 3537  degrees of freedom
  (671 observations deleted due to missingness)
AIC: 882.95

Number of Fisher Scoring iterations: 6
Code
summary(PICA_fitM_upd1)

Call:
glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor, 
    family = binomial, data = data_scale2, subset = data_scale2$Gender == 
        "Female" & data_scale2$PostMeno == 0)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.6176  -0.2450  -0.2033  -0.1570   3.1874  

Coefficients:
                            Estimate Std. Error z value Pr(>|z|)    
(Intercept)                 -4.83824    0.44894 -10.777  < 2e-16 ***
Age                         -0.07616    0.02259  -3.372 0.000746 ***
Weight                       0.02613    0.01088   2.402 0.016300 *  
Height                      -0.02488    0.02323  -1.071 0.284121    
Intervention_months_factor1  0.05523    0.38299   0.144 0.885335    
Intervention_months_factor2 -0.29262    0.43547  -0.672 0.501609    
Intervention_months_factor3  0.24805    0.43821   0.566 0.571356    
Intervention_months_factor4 -0.22381    0.46300  -0.483 0.628810    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 514.02  on 2344  degrees of freedom
Residual deviance: 496.47  on 2337  degrees of freedom
  (685 observations deleted due to missingness)
AIC: 512.47

Number of Fisher Scoring iterations: 7
Code
#multicollinearity
car::vif(PICA_fitM) # no VIF value that exceeds 5 or 10 
                               GVIF Df GVIF^(1/(2*Df))
Age                        1.064441  1        1.031718
Weight                     1.215704  1        1.102590
Height                     1.201809  1        1.096271
Intervention_months_factor 1.011403  4        1.001418

Restless legs syndrome

Restless legs syndrome (RLS) diagnose was based of Burchell & Alleen, 2008. Validation of the self-completed Cambridge-Hopkins questionnaire (CH-RLSq) for ascertainment of restless legs syndrome (RLS) in a population survey.

Predict by ferritin

Code
# Compute the analysis of variance
data_scale$RLS <- as.numeric(data_scale$RLS)
means <- round(aggregate(Ferritin ~  RLS, data_scale, mean), 1)
res.aov <- aov(RLS ~ Fergroup, data = data_scale)
summary(res.aov)
              Df Sum Sq Mean Sq F value Pr(>F)
Fergroup       2    0.1 0.05452   0.756   0.47
Residuals   6404  461.7 0.07210               
1748 observations deleted due to missingness
Code
#fit model
fit1 <- lm(RLS ~ 1 + Fergroup, data = data_scale)
summary(fit1)

Call:
lm(formula = RLS ~ 1 + Fergroup, data = data_scale)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.08338 -0.08338 -0.07836 -0.07216  0.92784 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)            1.072165   0.006612 162.148   <2e-16 ***
FergroupFerritin 15-30 0.011214   0.009125   1.229    0.219    
FergroupFerritin > 30  0.006200   0.008264   0.750    0.453    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.2685 on 6404 degrees of freedom
  (1748 observations deleted due to missingness)
Multiple R-squared:  0.0002361, Adjusted R-squared:  -7.613e-05 
F-statistic: 0.7562 on 2 and 6404 DF,  p-value: 0.4695
Code
data_scale <- data_scale %>% mutate(RLS = factor(case_when(RLS == 2 ~ 1, RLS == 1 ~ 0)) %>% fct_recode("Yes" = "1", "No" = "0"))

# Barplot
data_scale_per2 <- data_scale %>%
 group_by(Fergroup) %>% 
  count(RLS) %>%
  mutate(
    Percentage = round(proportions(n) * 100, 1),
    res = str_c(n, "(", Percentage, ")%")
    )
ggplot(subset(data_scale_per2, !is.na(RLS)), aes(Fergroup, Percentage, fill = RLS)) +
  geom_col(position = "dodge") + 
  geom_text(aes(label = res), vjust = -0.2)+ 
  theme_bw()

Code
ggplot(data_scale , mapping = aes(x = Fergroup, fill = RLS)) +
  geom_bar(position = "dodge") + 
  theme_bw()

Code
# t-test + boxplot
res <- res <- t.test(Ferritin ~ RLS, data = data_scale, var.equal = TRUE)
res

    Two Sample t-test

data:  Ferritin by RLS
t = -0.68841, df = 6405, p-value = 0.4912
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
 -6.026412  2.893888
sample estimates:
 mean in group No mean in group Yes 
         40.56889          42.13515 
Code
ggplot(subset(data_scale, !is.na(RLS)), aes(x=RLS, y=Ferritin)) +
  geom_boxplot() + 
  theme_bw() +
  geom_text(x = 2.3, y = 700, label=paste('Means:', "  ", means$Ferritin[1], "  ", means$Ferritin[2], "\n", 't(',res$parameter, '), p = ', round(res$p.value, 3), sep = '')) +
  labs(x="RLS", y="Ferritin")

Models

Code
RLS_fitM <- glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$Gender == "Male" ,family = binomial)
RLS_fit_preF <- glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$Gender == "Female" & data_scale$PostMeno == 0 ,family = binomial)
RLS_fit_postF <- glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$Gender == "Female" & data_scale$PostMeno == 1 ,family = binomial)

pl <- c(
  `(Intercept)` = "Intercept",
  Intervention_months_factor0 = "0-5 months",
  Intervention_months_factor1 = "6-11 months",
  Intervention_months_factor2 = "12-17 months",
  Intervention_months_factor3 = "18-23 months",
  Intervention_months_factor4 = "24+ months"
)

tab_model(RLS_fit_preF, RLS_fit_postF, RLS_fitM, pred.labels = pl, dv.labels = c("Pre-menopausal women", "Post-menopausal women", "Men"), show.reflvl = T, title = "Restless Legs Syndrome")
Restless Legs Syndrome
  Pre-menopausal women Post-menopausal women Men
Predictors Odds Ratios CI p Odds Ratios CI p Odds Ratios CI p
Intercept 0.09 0.06 – 0.13 <0.001 0.08 0.05 – 0.13 <0.001 0.06 0.04 – 0.08 <0.001
Age 1.03 1.01 – 1.05 0.014 1.02 0.99 – 1.04 0.209 1.01 1.00 – 1.02 0.131
Weight 1.00 0.98 – 1.01 0.891 1.01 0.99 – 1.02 0.494 1.01 1.00 – 1.02 0.209
Height 1.00 0.98 – 1.03 0.881 0.99 0.96 – 1.02 0.444 1.00 0.97 – 1.02 0.701
6-11 months 1.58 0.99 – 2.49 0.052 2.20 1.42 – 3.41 <0.001 1.09 0.70 – 1.64 0.700
12-17 months 0.98 0.57 – 1.65 0.955 0.54 0.28 – 0.97 0.051 0.73 0.45 – 1.14 0.182
18-23 months 2.23 1.34 – 3.63 0.002 1.14 0.53 – 2.23 0.726 1.80 1.12 – 2.80 0.012
24+ months 2.27 1.46 – 3.50 <0.001 2.27 1.37 – 3.71 0.001 1.49 1.01 – 2.16 0.042
Observations 2140 1303 3316
R2 Tjur 0.013 0.026 0.005

Model assumptions

Premenopausal females:

Code
plot(RLS_fit_preF, which = 4, id.n = 3)

Code
#Inspect the largest:
model.data <- augment(RLS_fit_preF) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) #residuals not above 3, no further attention needed 
# A tibble: 3 × 13
  .rownames RLS   Age[,1] Weight[,1] Height[,1] Intervention_months_fa…¹ .fitted
  <chr>     <fct>   <dbl>      <dbl>      <dbl> <fct>                      <dbl>
1 4031      Yes     0.985       36.8       1.24 3                          -1.66
2 6332      Yes    -9.02        11.8     -16.8  2                          -2.74
3 7722      Yes   -21.0         36.8      -1.76 3                          -2.23
# ℹ abbreviated name: ¹​Intervention_months_factor
# ℹ 6 more variables: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
#Plot standardized residuals: 
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = RLS), alpha = .5) +
  theme_bw()

Code
# Filter potential influential data point
model.data %>% 
  filter(abs(.std.resid) > 3) 
# A tibble: 0 × 13
# ℹ 13 variables: .rownames <chr>, RLS <fct>, Age <dbl[,1]>, Weight <dbl[,1]>,
#   Height <dbl[,1]>, Intervention_months_factor <fct>, .fitted <dbl>,
#   .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
#   index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow(data_scale)
cooksd = cooks.distance(RLS_fit_preF)
plot(cooksd, main="Cooks Distance for Influential Obs")
abline(h = (8/n) * 3, lty = 2, col = "steelblue") # add cutoff line

Code
# influence plot
car::influencePlot(RLS_fit_preF,col="red",id.n=3)

        StudRes         Hat        CookD
821   2.5083683 0.001718628 0.0047014738
3945 -0.5792803 0.022894442 0.0005401043
4031  1.9384013 0.017930194 0.0121754551
6262  2.5072010 0.002864416 0.0077316809
7722  2.1909050 0.013832408 0.0165800093
Code
car::influenceIndexPlot(RLS_fit_preF, id.n=3)

Code
# check without some outliers
data_scale2 <- data_scale[c(-4453, -3788, -7722), ]
RLS_fit_preF_upd1 <- update(RLS_fit_preF, data = data_scale2, subset= data_scale2$Gender == "Female" & data_scale2$PostMeno == 0)
car::compareCoefs(RLS_fit_preF,RLS_fit_preF_upd1) # no improvement
Calls:
1: glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor, 
  family = binomial, data = data_scale, subset = data_scale$Gender == 
  "Female" & data_scale$PostMeno == 0)
2: glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor, 
  family = binomial, data = data_scale2, subset = data_scale2$Gender == 
  "Female" & data_scale2$PostMeno == 0)

                              Model 1   Model 2
(Intercept)                    -2.448    -2.439
SE                              0.206     0.206
                                               
Age                            0.0259    0.0278
SE                             0.0105    0.0106
                                               
Weight                      -0.000999 -0.003117
SE                           0.007310  0.007445
                                               
Height                        0.00204   0.00228
SE                            0.01359   0.01368
                                               
Intervention_months_factor1     0.457     0.457
SE                              0.235     0.235
                                               
Intervention_months_factor2   -0.0151   -0.0166
SE                             0.2694    0.2694
                                               
Intervention_months_factor3     0.801     0.762
SE                              0.253     0.256
                                               
Intervention_months_factor4     0.818     0.827
SE                              0.223     0.223
                                               
Code
summary(RLS_fit_preF)

Call:
glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor, 
    family = binomial, data = data_scale, subset = data_scale$Gender == 
        "Female" & data_scale$PostMeno == 0)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.6133  -0.4526  -0.3739  -0.3201   2.5009  

Coefficients:
                             Estimate Std. Error z value Pr(>|z|)    
(Intercept)                 -2.448048   0.206469 -11.857  < 2e-16 ***
Age                          0.025930   0.010544   2.459 0.013923 *  
Weight                      -0.000999   0.007310  -0.137 0.891293    
Height                       0.002035   0.013588   0.150 0.880921    
Intervention_months_factor1  0.456934   0.234656   1.947 0.051505 .  
Intervention_months_factor2 -0.015128   0.269353  -0.056 0.955210    
Intervention_months_factor3  0.800577   0.252922   3.165 0.001549 ** 
Intervention_months_factor4  0.817593   0.222996   3.666 0.000246 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1192.2  on 2139  degrees of freedom
Residual deviance: 1165.0  on 2132  degrees of freedom
  (890 observations deleted due to missingness)
AIC: 1181

Number of Fisher Scoring iterations: 5
Code
summary(RLS_fit_preF_upd1)

Call:
glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor, 
    family = binomial, data = data_scale2, subset = data_scale2$Gender == 
        "Female" & data_scale2$PostMeno == 0)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.6260  -0.4486  -0.3741  -0.3189   2.5137  

Coefficients:
                             Estimate Std. Error z value Pr(>|z|)    
(Intercept)                 -2.438993   0.206326 -11.821  < 2e-16 ***
Age                          0.027829   0.010576   2.631 0.008502 ** 
Weight                      -0.003117   0.007445  -0.419 0.675439    
Height                       0.002279   0.013679   0.167 0.867654    
Intervention_months_factor1  0.456948   0.234694   1.947 0.051536 .  
Intervention_months_factor2 -0.016589   0.269384  -0.062 0.950896    
Intervention_months_factor3  0.761968   0.255774   2.979 0.002891 ** 
Intervention_months_factor4  0.826594   0.223089   3.705 0.000211 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1186.8  on 2136  degrees of freedom
Residual deviance: 1159.5  on 2129  degrees of freedom
  (890 observations deleted due to missingness)
AIC: 1175.5

Number of Fisher Scoring iterations: 5
Code
#multicollinearity
car::vif(RLS_fit_preF) # no VIF value that exceeds 5 or 10 
                               GVIF Df GVIF^(1/(2*Df))
Age                        1.068334  1        1.033602
Weight                     1.222045  1        1.105461
Height                     1.153224  1        1.073883
Intervention_months_factor 1.004374  4        1.000546

Postmenopausal females:

Code
plot(RLS_fit_postF, which = 4, id.n = 3)

Code
#Inspect the largest:
model.data <- augment(RLS_fit_postF) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) 
# A tibble: 3 × 13
  .rownames RLS   Age[,1] Weight[,1] Height[,1] Intervention_months_fa…¹ .fitted
  <chr>     <fct>   <dbl>      <dbl>      <dbl> <fct>                      <dbl>
1 1289      Yes      6.98      31.8        3.24 2                          -2.91
2 3919      Yes      5.98       9.81       9.24 3                          -2.37
3 4404      Yes      9.98     -16.2      -59.8  0                          -1.80
# ℹ abbreviated name: ¹​Intervention_months_factor
# ℹ 6 more variables: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
#Plot standardized residuals: 
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = RLS), alpha = .5) +
  theme_bw()

Code
# Filter potential influential data point
model.data %>% 
  filter(abs(.std.resid) > 3) 
# A tibble: 0 × 13
# ℹ 13 variables: .rownames <chr>, RLS <fct>, Age <dbl[,1]>, Weight <dbl[,1]>,
#   Height <dbl[,1]>, Intervention_months_factor <fct>, .fitted <dbl>,
#   .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
#   index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow(data_scale)
cooksd = cooks.distance(RLS_fit_postF)
plot(cooksd, main="Cooks Distance for Influential Obs")
abline(h = (8/n) * 3, lty = 2, col = "steelblue") # add cutoff line

Code
# influence plot
car::influencePlot(RLS_fit_postF,col="red",id.n=3)

       StudRes         Hat       CookD
1032 -0.686794 0.044823103 0.001586396
1268  2.483515 0.004722464 0.011822875
1290  2.492310 0.004082840 0.010503083
3919  2.252302 0.014149332 0.019472978
4404  2.089048 0.071559171 0.062543565
Code
car::influenceIndexPlot(RLS_fit_postF, id.n=3)

Code
# check without some outliers
data_scale2 <- data_scale[c(-3919, -4404), ]
RLS_fit_postF_upd1 <- update(RLS_fit_postF, data = data_scale2, subset= data_scale2$Gender == "Female" & data_scale2$PostMeno == 0)
car::compareCoefs(RLS_fit_postF,RLS_fit_postF_upd1) # no improvement
Calls:
1: glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor, 
  family = binomial, data = data_scale, subset = data_scale$Gender == 
  "Female" & data_scale$PostMeno == 1)
2: glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor, 
  family = binomial, data = data_scale2, subset = data_scale2$Gender == 
  "Female" & data_scale2$PostMeno == 0)

                              Model 1   Model 2
(Intercept)                    -2.546    -2.448
SE                              0.251     0.206
                                               
Age                            0.0165    0.0259
SE                             0.0131    0.0105
                                               
Weight                       0.005569 -0.000999
SE                           0.008147  0.007310
                                               
Height                       -0.01131   0.00204
SE                            0.01476   0.01359
                                               
Intervention_months_factor1     0.790     0.457
SE                              0.223     0.235
                                               
Intervention_months_factor2   -0.6212   -0.0151
SE                             0.3183    0.2694
                                               
Intervention_months_factor3     0.127     0.801
SE                              0.364     0.253
                                               
Intervention_months_factor4     0.822     0.818
SE                              0.253     0.223
                                               
Code
#multicollinearity
car::vif(RLS_fit_postF) # no VIF value that exceeds 5 or 10 
                               GVIF Df GVIF^(1/(2*Df))
Age                        1.028736  1        1.014266
Weight                     1.140586  1        1.067982
Height                     1.149297  1        1.072053
Intervention_months_factor 1.021225  4        1.002629

Males:

Code
plot(RLS_fitM, which = 4, id.n = 3)

Code
#Inspect the largest:
model.data <- augment(RLS_fitM) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) 
# A tibble: 3 × 13
  .rownames RLS   Age[,1] Weight[,1] Height[,1] Intervention_months_fa…¹ .fitted
  <chr>     <fct>   <dbl>      <dbl>      <dbl> <fct>                      <dbl>
1 791       Yes      6.98       56.8      -1.76 0                          -2.34
2 2907      Yes    -15.0        46.8       3.24 4                          -2.20
3 4049      Yes    -22.0        56.8       5.24 1                          -2.50
# ℹ abbreviated name: ¹​Intervention_months_factor
# ℹ 6 more variables: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
#Plot standardized residuals: 
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = RLS), alpha = .5) +
  theme_bw()

Code
# Filter potential influential data point
model.data %>% 
  filter(abs(.std.resid) > 3) # 0 influential data points
# A tibble: 0 × 13
# ℹ 13 variables: .rownames <chr>, RLS <fct>, Age <dbl[,1]>, Weight <dbl[,1]>,
#   Height <dbl[,1]>, Intervention_months_factor <fct>, .fitted <dbl>,
#   .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
#   index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow(data_scale)
cooksd = cooks.distance(RLS_fitM)
plot(cooksd, main="Cooks Distance for Influential Obs")
abline(h = (8/n) * 3, lty = 2, col = "steelblue") # add cutoff line

Code
# influence plot
car::influencePlot(RLS_fitM,col="red",id.n=3)

        StudRes         Hat        CookD
791   2.2287712 0.010709142 0.0141342181
1294 -0.3135131 0.023596520 0.0001538936
1302  2.6051234 0.002345544 0.0081914267
3057 -0.5579593 0.022685414 0.0004933534
4049  2.3021371 0.011519861 0.0179545641
7085  2.6056903 0.003212045 0.0111192775
Code
car::influenceIndexPlot(RLS_fitM, id.n=3)

Code
#multicollinearity
car::vif(RLS_fitM) # no VIF value that exceeds 5 or 10 
                               GVIF Df GVIF^(1/(2*Df))
Age                        1.078175  1        1.038352
Weight                     1.240648  1        1.113844
Height                     1.242826  1        1.114821
Intervention_months_factor 1.009696  4        1.001207

Cognitive functioning

Scoring: https://meetinstrumentenzorg.nl/wp-content/uploads/instrumenten/CFQ-form.pdf

Predict by ferritin

Code
res <- cor.test(data_scale$CFQ_Total, data_scale$Ferritin,  method = "pearson", use = "complete.obs")
ggplot(data_scale, mapping = aes(x = CFQ_Total, y = Ferritin)) + 
  geom_point() + 
  theme_bw() +
  geom_text(x = 75, y = 1000, label=paste('r(',res$parameter,') = ', round(res$estimate,3),', p = ', round(res$p.value, 3), sep = '')) +
  labs(x="Cognitive functioning, higher is worse", y="Ferritin")

Code
# plot for cutoffs
data_scale <- data_scale %>% mutate(Fergroup = factor(case_when(Ferritin < 15 ~ 1, Ferritin >= 15 & Ferritin <= 30~ 2, Ferritin > 30 ~ 3)) %>%  fct_recode("Ferritin < 15" = "1", "Ferritin 15-30" = "2", "Ferritin > 30" = "3"))

ggplot(data_scale, aes(x=Fergroup, y=CFQ_Total)) +
  geom_boxplot() + 
  theme_bw() +
  labs(x="Ferritin cut-off groups", y="Cognitive functioning, higher is worse")

Models

Code
CFQ_fitM <- lm(formula = CFQ_Total ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$Gender == "Male")
CFQ_fit_preF <- lm(formula = CFQ_Total ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$Gender == "Female" & data_scale$PostMeno == 0)
CFQ_fit_postF <- lm(formula = CFQ_Total ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$Gender == "Female" & data_scale$PostMeno == 1)

pl <- c(
  `(Intercept)` = "Intercept",
  Intervention_months_factor0 = "0-5 months",
  Intervention_months_factor1 = "6-11 months",
  Intervention_months_factor2 = "12-17 months",
  Intervention_months_factor3 = "18-23 months",
  Intervention_months_factor4 = "24+ months"
)

tab_model(CFQ_fit_preF, CFQ_fit_postF, CFQ_fitM, pred.labels = pl, dv.labels = c("Pre-menopausal women", "Post-menopausal women", "Men"), show.reflvl = T, title = "Cognitive functioning")
Cognitive functioning
  Pre-menopausal women Post-menopausal women Men
Predictors Estimates CI p Estimates CI p Estimates CI p
Intercept 27.27 25.92 – 28.62 <0.001 28.03 26.52 – 29.54 <0.001 25.51 24.85 – 26.17 <0.001
Age -0.25 -0.32 – -0.18 <0.001 -0.16 -0.25 – -0.08 <0.001 -0.14 -0.17 – -0.11 <0.001
Weight 0.01 -0.04 – 0.06 0.754 0.02 -0.04 – 0.07 0.570 -0.01 -0.04 – 0.02 0.569
Height -0.09 -0.17 – 0.00 0.062 0.01 -0.09 – 0.11 0.801 -0.08 -0.14 – -0.02 0.007
6-11 months 0.05 -1.48 – 1.58 0.951 0.70 -0.94 – 2.34 0.402 0.25 -0.87 – 1.37 0.657
12-17 months -0.97 -2.49 – 0.54 0.209 1.05 -0.49 – 2.59 0.181 0.77 -0.30 – 1.83 0.159
18-23 months -0.68 -2.58 – 1.23 0.484 2.11 -0.30 – 4.53 0.087 -0.66 -2.15 – 0.82 0.381
24+ months 1.57 -0.07 – 3.21 0.060 1.04 -0.96 – 3.03 0.307 0.15 -0.99 – 1.29 0.795
Observations 2342 1464 3534
R2 / R2 adjusted 0.025 / 0.022 0.013 / 0.009 0.036 / 0.034

Assumptions

Premenopausal females:

Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity ok,  constant variance meh (heteroscedasticity)
plot(CFQ_fit_preF, which = 1)

Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot (also known as  spread-location plot)
plot(CFQ_fit_preF, which = 3) # Linearity good, don't have constant variance 

Code
# formal test in the form of Breusch-Pagan 
bptest(CFQ_fit_preF) # small p-value, we reject the null of homoscedasticity. The constant variance assumption is violated.

    studentized Breusch-Pagan test

data:  CFQ_fit_preF
BP = 36.591, df = 7, p-value = 5.607e-06
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot(CFQ_fit_preF, 2) #suspect

Code
# formal test in the form of Shapiro-Wilk
shapiro.test(resid(CFQ_fit_preF)) #fail to reject for the data sampled from normal

    Shapiro-Wilk normality test

data:  resid(CFQ_fit_preF)
W = 0.98238, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot(CFQ_fit_preF, which = 4, id.n = 10)

Code
# inspect the largest
model.data <- augment(CFQ_fit_preF) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
  .rownames CFQ_Total Age[,1] Weight[,1] Height[,1] Intervention_months_factor
  <chr>         <dbl>   <dbl>      <dbl>      <dbl> <fct>                     
1 275              60   -6.02       49.8       5.24 2                         
2 4065             73  -17.0        35.8     -10.8  1                         
3 7774             82  -22.0       -20.2      -6.76 3                         
# ℹ 7 more variables: .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>,
#   .cooksd <dbl>, .std.resid <dbl>, index <int>
Code
# Plot standardized residuals
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = CFQ_Total), alpha = .5) +
  theme_bw()

Code
# Filter potential influential data point
model.data %>% 
  filter(abs(.std.resid) > 3) # some influential data points
# A tibble: 19 × 13
   .rownames CFQ_Total Age[,1] Weight[,1] Height[,1] Intervention_months_factor
   <chr>         <dbl>   <dbl>      <dbl>      <dbl> <fct>                     
 1 671              78  -18.0      -17.2     -12.8   0                         
 2 1146             76  -17.0      -23.2      -4.76  1                         
 3 1366             87  -15.0      -11.2      -9.76  0                         
 4 1414             77  -23.0       -8.19      0.240 2                         
 5 1776             70  -16.0        8.81      3.24  2                         
 6 3168             72  -14.0       19.8      -0.760 2                         
 7 3514             86  -17.0      -15.2     -11.8   4                         
 8 3611             92  -18.0      -13.2      -8.76  4                         
 9 3662             83  -17.0      -23.2     -15.8   4                         
10 3812             82  -17.0       14.8      -8.76  0                         
11 4065             73  -17.0       35.8     -10.8   1                         
12 4595             78  -16.0        6.81      1.24  0                         
13 5244             92  -21.0      -20.2      -8.76  2                         
14 5572             72   -1.02      -3.19     -1.76  4                         
15 6447             75  -14.0       -8.19      6.24  0                         
16 6780             74  -18.0        6.81    -10.8   4                         
17 6859             72  -16.0      -18.2      -8.76  3                         
18 7737             73  -19.0       -6.19     -2.76  3                         
19 7774             82  -22.0      -20.2      -6.76  3                         
# ℹ 7 more variables: .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>,
#   .cooksd <dbl>, .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance(CFQ_fit_preF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow(data_scale)
plot(cooksd, main = "Cooks Distance for Influential Obs")
abline(h = (8/n) * 3, lty = 2, col = "steelblue") # add cutoff line

Code
# (log) transform to pass normality. 
data_scale2 <- data_scale
data_scale2$CFQ_Total <- log(data_scale2$CFQ_Total)
table(data_scale2$CFQ_Total)

             -Inf                 0 0.693147180559945  1.09861228866811 
               29                20                29                34 
 1.38629436111989   1.6094379124341  1.79175946922805  1.94591014905531 
               31                66                53                73 
 2.07944154167984  2.19722457733622  2.30258509299405  2.39789527279837 
               86                91               108               121 
   2.484906649788  2.56494935746154  2.63905732961526  2.70805020110221 
              147               143               150               180 
 2.77258872223978  2.83321334405622  2.89037175789616  2.94443897916644 
              193               199               229               246 
 2.99573227355399  3.04452243772342  3.09104245335832  3.13549421592915 
              230               266               225               290 
 3.17805383034795   3.2188758248682  3.25809653802148  3.29583686600433 
              262               293               273               269 
  3.3322045101752  3.36729582998647  3.40119738166216  3.43398720448515 
              265               279               252               236 
 3.46573590279973  3.49650756146648  3.52636052461616  3.55534806148941 
              190               218               205               175 
 3.58351893845611  3.61091791264422  3.63758615972639  3.66356164612965 
              176               162               130               111 
 3.68887945411394  3.71357206670431  3.73766961828337  3.76120011569356 
              130               113                84               105 
 3.78418963391826  3.80666248977032   3.8286413964891  3.85014760171006 
               60                76                62                59 
 3.87120101090789  3.89182029811063  3.91202300542815  3.93182563272433 
               50                44                45                35 
 3.95124371858143  3.97029191355212  3.98898404656427  4.00733318523247 
               25                36                21                18 
 4.02535169073515  4.04305126783455  4.06044301054642  4.07753744390572 
               19                17                19                18 
  4.0943445622221  4.11087386417331  4.12713438504509  4.14313472639153 
               15                17                 8                 9 
 4.15888308335967  4.17438726989564  4.18965474202643  4.20469261939097 
                5                 3                 9                 7 
 4.21950770517611  4.23410650459726  4.24849524204936  4.26267987704132 
                5                 2                 3                 4 
 4.27666611901606  4.29045944114839  4.30406509320417  4.31748811353631 
                4                 2                 1                 1 
 4.33073334028633  4.34380542185368  4.35670882668959  4.36944785246702 
                1                 2                 2                 1 
 4.39444915467244  4.40671924726425   4.4188406077966  4.45434729625351 
                1                 2                 1                 1 
 4.46590811865458  4.52178857704904 
                1                 2 
Code
data_scale2[data_scale2 <= -Inf ] <- NA  # Replace -Inf with NA
table(data_scale2$CFQ_Total)

                0 0.693147180559945  1.09861228866811  1.38629436111989 
               20                29                34                31 
  1.6094379124341  1.79175946922805  1.94591014905531  2.07944154167984 
               66                53                73                86 
 2.19722457733622  2.30258509299405  2.39789527279837    2.484906649788 
               91               108               121               147 
 2.56494935746154  2.63905732961526  2.70805020110221  2.77258872223978 
              143               150               180               193 
 2.83321334405622  2.89037175789616  2.94443897916644  2.99573227355399 
              199               229               246               230 
 3.04452243772342  3.09104245335832  3.13549421592915  3.17805383034795 
              266               225               290               262 
  3.2188758248682  3.25809653802148  3.29583686600433   3.3322045101752 
              293               273               269               265 
 3.36729582998647  3.40119738166216  3.43398720448515  3.46573590279973 
              279               252               236               190 
 3.49650756146648  3.52636052461616  3.55534806148941  3.58351893845611 
              218               205               175               176 
 3.61091791264422  3.63758615972639  3.66356164612965  3.68887945411394 
              162               130               111               130 
 3.71357206670431  3.73766961828337  3.76120011569356  3.78418963391826 
              113                84               105                60 
 3.80666248977032   3.8286413964891  3.85014760171006  3.87120101090789 
               76                62                59                50 
 3.89182029811063  3.91202300542815  3.93182563272433  3.95124371858143 
               44                45                35                25 
 3.97029191355212  3.98898404656427  4.00733318523247  4.02535169073515 
               36                21                18                19 
 4.04305126783455  4.06044301054642  4.07753744390572   4.0943445622221 
               17                19                18                15 
 4.11087386417331  4.12713438504509  4.14313472639153  4.15888308335967 
               17                 8                 9                 5 
 4.17438726989564  4.18965474202643  4.20469261939097  4.21950770517611 
                3                 9                 7                 5 
 4.23410650459726  4.24849524204936  4.26267987704132  4.27666611901606 
                2                 3                 4                 4 
 4.29045944114839  4.30406509320417  4.31748811353631  4.33073334028633 
                2                 1                 1                 1 
 4.34380542185368  4.35670882668959  4.36944785246702  4.39444915467244 
                2                 2                 1                 1 
 4.40671924726425   4.4188406077966  4.45434729625351  4.46590811865458 
                2                 1                 1                 1 
 4.52178857704904 
                2 
Code
CFQFfactor_fix <- lm(CFQ_Total~ Age + Weight + Height + as.factor(Intervention_months_factor), 
                     data = data_scale2, 
                     subset = data_scale2$Gender == "Female" & data_scale2$PostMeno == 0)

qqnorm(resid(CFQFfactor_fix), col = "grey")
qqline(resid(CFQFfactor_fix), col = "dodgerblue", lwd = 2)

Code
shapiro.test(resid(CFQFfactor_fix))

    Shapiro-Wilk normality test

data:  resid(CFQFfactor_fix)
W = 0.9088, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see below.

#multicollinearity
car::vif(CFQ_fit_preF) # no VIF value that exceeds 5 or 10 
                               GVIF Df GVIF^(1/(2*Df))
Age                        1.065997  1        1.032471
Weight                     1.201466  1        1.096114
Height                     1.134895  1        1.065314
Intervention_months_factor 1.003608  4        1.000450

Postmenopausal females:

Code
#### FEMALE post
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity good,  constant variance looks good (heteroscedasticity)
plot(CFQ_fit_postF, which = 1)

Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot(CFQ_fit_postF, which = 3) # Linearity good, constant variance 

Code
# formal test in the form of Breusch-Pagan 
bptest(CFQ_fit_postF) # we fail to reject the null of homoscedasticity. The constant variance assumption is not violated.

    studentized Breusch-Pagan test

data:  CFQ_fit_postF
BP = 12.905, df = 7, p-value = 0.07445
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot(CFQ_fit_postF, 2) #suspect

Code
# formal test in the form of Shapiro-Wilk
shapiro.test(resid(CFQ_fit_postF)) #fail to reject for the data sampled from normal

    Shapiro-Wilk normality test

data:  resid(CFQ_fit_postF)
W = 0.99081, p-value = 6.045e-08
Code
### OUTLIERS
#plot 3largest
plot(CFQ_fit_postF, which = 4, id.n = 3)

Code
# inspect the largest
model.data <- augment(CFQ_fit_postF) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
  .rownames CFQ_Total Age[,1] Weight[,1] Height[,1] Intervention_months_factor
  <chr>         <dbl>   <dbl>      <dbl>      <dbl> <fct>                     
1 3699              2    3.98      10.8       -3.76 3                         
2 3940              0    6.98     -19.2      -22.8  3                         
3 4433             79    3.98      -8.19     -12.8  4                         
# ℹ 7 more variables: .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>,
#   .cooksd <dbl>, .std.resid <dbl>, index <int>
Code
# Plot standardized residuals
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = CFQ_Total), alpha = .5) +
  theme_bw()

Code
# Filter potential influential data point
model.data %>% 
  filter(abs(.std.resid) > 3) # some influential data points
# A tibble: 4 × 13
  .rownames CFQ_Total Age[,1] Weight[,1] Height[,1] Intervention_months_factor
  <chr>         <dbl>   <dbl>      <dbl>      <dbl> <fct>                     
1 695              72   15.0      11.8        -6.76 0                         
2 1869             61   11.0       7.81       -7.76 2                         
3 4433             79    3.98     -8.19      -12.8  4                         
4 4695             60   16.0       0.811      -4.76 2                         
# ℹ 7 more variables: .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>,
#   .cooksd <dbl>, .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance(CFQ_fit_postF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow(data_scale)
plot(cooksd, main = "Cooks Distance for Influential Obs")
abline(h = (8/n) * 3, lty = 2, col = "steelblue") # add cutoff line

Code
# log transform to pass normality. 
CFQ_postF_factor_fix <- lm(CFQ_Total~ Age + Weight + Height + as.factor(Intervention_months_factor), 
                     data = data_scale2, 
                     subset = data_scale2$Gender == "Female" & data_scale2$PostMeno == 1)

qqnorm(resid(CFQ_postF_factor_fix), col = "grey")
qqline(resid(CFQ_postF_factor_fix), col = "dodgerblue", lwd = 2)

Code
shapiro.test(resid(CFQ_postF_factor_fix))

    Shapiro-Wilk normality test

data:  resid(CFQ_postF_factor_fix)
W = 0.93056, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.

#multicollinearity
car::vif(CFQ_fit_postF) # no VIF value that exceeds 5 or 10 
                               GVIF Df GVIF^(1/(2*Df))
Age                        1.035602  1        1.017645
Weight                     1.153175  1        1.073860
Height                     1.177418  1        1.085089
Intervention_months_factor 1.012313  4        1.001531

Males:

Code
### MALE
### LINEARITY (Lnearity and constant variance assumptions)
#Fitted versus Residuals Plot: Good
plot(CFQ_fitM, which = 1)      

Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot(CFQ_fitM, which = 3) # weird pattern below

Code
# formal test in the form of Breusch-Pagan 
bptest(CFQ_fitM)# small p-value, so we reject the null of homoscedasticity. The constant variance assumption is violated.

    studentized Breusch-Pagan test

data:  CFQ_fitM
BP = 63.672, df = 7, p-value = 2.779e-11
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot(CFQ_fitM, 2) # suspect

Code
# formal test in the form of Shapiro-Wilk
shapiro.test(resid(CFQ_fitM)) #fail to reject for the data sampled from normal

    Shapiro-Wilk normality test

data:  resid(CFQ_fitM)
W = 0.98849, p-value = 2.327e-16
Code
### OUTLIERS
#plot 3largest
plot(CFQ_fitM, which = 4, id.n = 3)

Code
# inspect the largest
model.data <- augment(CFQ_fitM) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
  .rownames CFQ_Total Age[,1] Weight[,1] Height[,1] Intervention_months_factor
  <chr>         <dbl>   <dbl>      <dbl>      <dbl> <fct>                     
1 897              66   -9.02      66.8       23.2  2                         
2 2736             65  -18.0       56.8       10.2  0                         
3 5561             66   29.0       -7.19      -5.76 4                         
# ℹ 7 more variables: .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>,
#   .cooksd <dbl>, .std.resid <dbl>, index <int>
Code
# Plot standardized residuals
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = CFQ_Total), alpha = .5) +
  theme_bw()

Code
# Filter potential influential data point
model.data %>% 
  filter(abs(.std.resid) > 3) # some influential data points
# A tibble: 22 × 13
   .rownames CFQ_Total Age[,1] Weight[,1] Height[,1] Intervention_months_factor
   <chr>         <dbl>   <dbl>      <dbl>      <dbl> <fct>                     
 1 309              59    5.98      11.8        1.24 0                         
 2 897              66   -9.02      66.8       23.2  2                         
 3 1214             61   15.0       13.8        5.24 1                         
 4 1266             59    6.98       1.81      12.2  2                         
 5 1730             81    4.98     -14.2       -3.76 0                         
 6 2505             61   11.0       -3.19      -2.76 3                         
 7 2736             65  -18.0       56.8       10.2  0                         
 8 2756             67  -24.0       -8.19     -11.8  0                         
 9 2879             68  -18.0        1.81       8.24 4                         
10 3045             61  -18.0       11.8       18.2  4                         
# ℹ 12 more rows
# ℹ 7 more variables: .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>,
#   .cooksd <dbl>, .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance(CFQ_fitM)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow(data_scale)
plot(cooksd, main = "Cooks Distance for Influential Obs")
abline(h = (8/n) * 3, lty = 2, col = "steelblue") # add cutoff line

Code
# log transform to pass normality. 
CFQMfactor_fix <- lm(CFQ_Total~ Age + Weight + Height + as.factor(Intervention_months_factor), 
                     data = data_scale2, 
                     subset = data_scale2$Gender == "Male")

qqnorm(resid(CFQMfactor_fix), col = "grey")
qqline(resid(CFQMfactor_fix), col = "dodgerblue", lwd = 2)

Code
shapiro.test(resid(CFQMfactor_fix))

    Shapiro-Wilk normality test

data:  resid(CFQMfactor_fix)
W = 0.89562, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.

#multicollinearity
car::vif(CFQ_fitM) # no VIF value that exceeds 5 or 10 
                               GVIF Df GVIF^(1/(2*Df))
Age                        1.091034  1        1.044526
Weight                     1.270075  1        1.126976
Height                     1.250205  1        1.118126
Intervention_months_factor 1.007324  4        1.000913
Decision

We conclude that the assumptions were violated and thus we use robust models.

Robust models

Code
CFQ_preF_robust <- lmrob(CFQ_Total ~ Age + Weight + Height + as.factor(Intervention_months_factor), data = data_scale, subset = data_scale$Gender == "Female" & data_scale$PostMeno == 0)
CFQ_postF_robust <- lmrob(CFQ_Total ~ Age + Weight + Height + as.factor(Intervention_months_factor), data = data_scale, subset = data_scale$Gender == "Female" & data_scale$PostMeno == 1)
CFQ_M_robust <- lmrob(CFQ_Total ~ Age + Weight + Height + as.factor(Intervention_months_factor), data = data_scale, subset = data_scale$Gender == "Male")


pl <- c(
  `(Intercept)` = "Intercept",
  `as.factor(Intervention_months_factor)1` = "6-12 months",
  `as.factor(Intervention_months_factor)2` = "12-18 months",
  `as.factor(Intervention_months_factor)3` = "18-24 months",
  `as.factor(Intervention_months_factor)4` = "24+ months"
  
)

tab_model(CFQ_preF_robust, CFQ_postF_robust, CFQ_M_robust, pred.labels = pl, dv.labels = c("Pre-menopausal women", "Post-menopausal women", "Men"), show.reflvl = T, title = "Cognitive functioning, higher means more cognitive failure", digits = 3)
Cognitive functioning, higher means more cognitive failure
  Pre-menopausal women Post-menopausal women Men
Predictors Estimates CI p Estimates CI p Estimates CI p
Intercept 27.197 25.990 – 28.404 <0.001 27.331 25.745 – 28.918 <0.001 25.049 24.365 – 25.732 <0.001
Age -0.210 -0.280 – -0.141 <0.001 -0.148 -0.240 – -0.056 0.002 -0.126 -0.153 – -0.098 <0.001
Weight -0.005 -0.053 – 0.042 0.826 0.000 -0.053 – 0.053 0.997 -0.021 -0.059 – 0.016 0.269
Height -0.073 -0.161 – 0.015 0.102 0.014 -0.087 – 0.114 0.791 -0.069 -0.129 – -0.008 0.026
6-12 months 0.144 -1.354 – 1.642 0.851 0.873 -0.767 – 2.512 0.297 0.047 -1.037 – 1.130 0.933
12-18 months -1.458 -2.942 – 0.025 0.054 1.084 -0.502 – 2.670 0.180 0.807 -0.285 – 1.898 0.148
18-24 months -0.924 -2.617 – 0.769 0.285 2.588 -0.055 – 5.231 0.055 -0.591 -2.138 – 0.955 0.454
24+ months 0.866 -0.823 – 2.555 0.315 0.859 -1.123 – 2.842 0.395 -0.238 -1.375 – 0.900 0.682
Observations 2342 1464 3534
R2 / R2 adjusted 0.022 / 0.019 0.012 / 0.007 0.033 / 0.031

Physical health

Scored by https://meetinstrumentenzorg.nl/wp-content/uploads/instrumenten/SF-36-RAND-36-form.pdf

Predict by ferritin

Code
res <- cor.test(data_scale$SF_ph, data_scale$Ferritin,  method = "pearson", use = "complete.obs")
ggplot(data_scale, mapping = aes(x = SF_ph, y = Ferritin)) + 
  geom_point() + 
  theme_bw() +
  geom_text(x = 75, y = 1000, label=paste('r(',res$parameter,') = ', round(res$estimate,3),', p = ', round(res$p.value, 3), sep = '')) +
  labs(x="Physical health (SF-36), higher is better", y="Ferritin")

Code
# plot for cutoffs
data_scale <- data_scale %>% mutate(Fergroup = factor(case_when(Ferritin < 15 ~ 1, Ferritin >= 15 & Ferritin <= 30~ 2, Ferritin > 30 ~ 3)) %>%  fct_recode("Ferritin < 15" = "1", "Ferritin 15-30" = "2", "Ferritin > 30" = "3"))

ggplot(data_scale, aes(x=Fergroup, y=SF_ph)) +
  geom_boxplot() + 
  theme_bw() +
  labs(x="Ferritin cut-off groups", y="Physical health (SF-36), higher is better")

Models

Code
SF_ph_preF <- lm(SF_ph ~ Age + Weight + Height + as.factor(Intervention_months_factor), data = data_scale, subset  = data_scale$Gender == "Female" & data_scale$PostMeno == 0)
SF_ph_postF <- lm(SF_ph ~ Age + Weight + Height + as.factor(Intervention_months_factor), data = data_scale,  subset = data_scale$Gender == "Female" & data_scale$PostMeno == 1)
SF_ph_M <- lm(SF_ph ~ Age + Weight + Height + as.factor(Intervention_months_factor), data = data_scale,  subset = data_scale$Gender == "Male")

pl <- c(
  `(Intercept)` = "Intercept",
  `as.factor(Intervention_months_factor)1` = "6-12 months",
  `as.factor(Intervention_months_factor)2` = "12-18 months",
  `as.factor(Intervention_months_factor)3` = "18-24 months",
  `as.factor(Intervention_months_factor)4` = "24+ months"
  
)


tab_model(SF_ph_preF, SF_ph_postF, SF_ph_M, pred.labels = pl, dv.labels = c("Pre-menopausal women", "Post-menopausal women", "Men"), title = "Physical health (Sf-36), higher score indicates better health", show.reflvl = T, digits=3)
Physical health (Sf-36), higher score indicates better health
  Pre-menopausal women Post-menopausal women Men
Predictors Estimates CI p Estimates CI p Estimates CI p
Intercept 90.543 89.548 – 91.539 <0.001 88.301 86.818 – 89.785 <0.001 91.230 90.733 – 91.728 <0.001
Age 0.079 0.026 – 0.132 0.004 -0.025 -0.110 – 0.060 0.562 -0.031 -0.050 – -0.011 0.002
Weight -0.154 -0.190 – -0.118 <0.001 -0.190 -0.242 – -0.137 <0.001 -0.113 -0.138 – -0.087 <0.001
Height 0.146 0.080 – 0.212 <0.001 0.048 -0.047 – 0.144 0.321 0.117 0.073 – 0.162 <0.001
6-12 months -0.203 -1.329 – 0.922 0.723 0.435 -1.167 – 2.038 0.594 0.099 -0.744 – 0.942 0.818
12-18 months 0.329 -0.783 – 1.441 0.562 -1.258 -2.771 – 0.255 0.103 -0.538 -1.344 – 0.269 0.191
18-24 months -0.479 -1.878 – 0.920 0.502 -1.913 -4.272 – 0.446 0.112 -0.088 -1.210 – 1.033 0.877
24+ months -0.496 -1.703 – 0.712 0.421 0.985 -0.977 – 2.947 0.325 -0.015 -0.879 – 0.848 0.972
Observations 2398 1468 3582
R2 / R2 adjusted 0.031 / 0.028 0.039 / 0.035 0.031 / 0.029

Assumptions

Premenopausal females:

Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity bad,  constant variance meh (heteroscedasticity)
plot(SF_ph_preF,  which = 1)

Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot (also known as  spread-location plot)
plot(SF_ph_preF, which = 3) # Linearity bad, constant variance meh 

Code
# formal test in the form of Breusch-Pagan 
bptest(SF_ph_preF) # small p-value, we reject the null of homoscedasticity. The constant variance assumption is violated.

    studentized Breusch-Pagan test

data:  SF_ph_preF
BP = 25.899, df = 7, p-value = 0.0005251
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot(SF_ph_preF, 2) #suspect

Code
# formal test in the form of Shapiro-Wilk
shapiro.test(resid(SF_ph_preF)) #reject  the data sampled from normal

    Shapiro-Wilk normality test

data:  resid(SF_ph_preF)
W = 0.79775, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot(SF_ph_preF,  which = 4, id.n = 3)

Code
# inspect the largest
model.data <- augment(SF_ph_preF) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
  .rownames SF_ph Age[,1] Weight[,1] Height[,1] as.factor(Intervention…¹ .fitted
  <chr>     <dbl>   <dbl>      <dbl>      <dbl> <fct>                      <dbl>
1 3703       31.9   -2.02      24.8        8.24 3                           87.3
2 4034       21.9  -22.0       -7.19     -14.8  3                           87.3
3 4065       13.8  -17.0       35.8      -10.8  1                           81.9
# ℹ abbreviated name: ¹​`as.factor(Intervention_months_factor)`
# ℹ 6 more variables: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
# Plot standardized residuals
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = SF_ph), alpha = .5) +
  theme_bw()

Code
# Filter potential influential data point
model.data %>% 
  filter(abs(.std.resid) > 4) # some influential data points
# A tibble: 20 × 13
   .rownames SF_ph Age[,1] Weight[,1] Height[,1] as.factor(Interventio…¹ .fitted
   <chr>     <dbl>   <dbl>      <dbl>      <dbl> <fct>                     <dbl>
 1 936        43.1   -8.02      9.81     -16.8   2                          86.3
 2 1407       23.8  -15.0     -17.2       -8.76  2                          91.1
 3 1659       28.1  -12.0      26.8        1.24  0                          85.6
 4 2032       34.4  -14.0     -11.2       -7.76  0                          90.0
 5 2111       41.2  -21.0      -5.19       3.24  0                          90.2
 6 2271       48.7  -21.0     -13.2       -7.76  0                          89.8
 7 2998       50.6  -22.0     -26.2      -11.8   4                          90.6
 8 3703       31.9   -2.02     24.8        8.24  3                          87.3
 9 3877       50.0  -19.0      -0.189      9.24  0                          90.4
10 4034       21.9  -22.0      -7.19     -14.8   3                          87.3
11 4065       13.8  -17.0      35.8      -10.8   1                          81.9
12 4595       42.4  -16.0       6.81       1.24  0                          88.4
13 5649       38.7   -1.02     -4.19     -18.8   0                          88.4
14 6172       32.5  -21.0       4.81     -16.8   1                          85.5
15 6286       52.5   -5.02    -10.2       -0.760 2                          91.9
16 6414       45.0  -12.0     -20.2       -9.76  0                          91.3
17 6859       47.5  -16.0     -18.2       -8.76  3                          90.3
18 7026       42.6   -6.02    -13.2       -8.76  0                          90.8
19 7246       44.4  -22.0     -13.2      -15.8   1                          88.4
20 7301       43.2  -19.0     -18.2       -2.76  1                          91.3
# ℹ abbreviated name: ¹​`as.factor(Intervention_months_factor)`
# ℹ 6 more variables: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance(SF_ph_preF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow(data_scale)
plot(cooksd, main = "Cooks Distance for Influential Obs")
abline(h = (8/n) * 3, lty = 2, col = "steelblue") # add cutoff line

Code
# log transform to pass normality. 
data_scale2 <- data_scale
table(data_scale2$SF_ph)

13.8010204081633 15.0510204081633 15.1020408163265            16.25 
               1                1                1                1 
21.9132653061224 23.1122448979592 23.8010204081633 23.8520408163265 
               1                1                1                1 
25.6122448979592 26.8622448979592 27.4744897959184 28.1122448979592 
               1                1                1                1 
29.3622448979592 30.6122448979592 31.1734693877551 31.9132653061224 
               1                1                1                2 
32.4744897959184 32.6020408163265 33.1632653061225 33.8010204081633 
               1                1                1                1 
34.4132653061224 35.6122448979592 35.6632653061224 37.4234693877551 
               1                1                1                1 
38.7244897959184 39.3622448979592 39.9744897959184 40.0255102040816 
               3                2                1                1 
40.0510204081633 40.6122448979592 40.6632653061224 41.1734693877551 
               1                2                1                1 
41.2244897959184 42.4234693877551 42.4744897959184 42.6020408163265 
               4                1                2                1 
43.0867346938775 43.1632653061224 43.7244897959184 44.3367346938775 
               1                1                4                1 
44.4132653061224 44.9744897959184 45.1020408163265 45.6122448979592 
               2                3                1                1 
46.2244897959184 46.2755102040816 46.8367346938775 47.4744897959184 
               1                1                1                3 
48.0357142857143 48.0867346938775 48.6734693877551 48.7244897959184 
               2                2                1                4 
48.7755102040816 49.4132653061224 49.9744897959184 50.5357142857143 
               2                2                2                2 
50.5867346938775 50.6122448979592 51.1734693877551 51.2244897959184 
               3                1                1                2 
51.2755102040816 51.7857142857143 51.8367346938775 52.4744897959184 
               2                1                3                6 
52.5255102040816  52.984693877551 53.0357142857143 53.0867346938775 
               1                1                3                5 
53.1632653061224 53.7244897959184 53.7755102040816 54.3367346938775 
               1                3                1                4 
54.3622448979592 54.3877551020408 54.4642857142857 54.9744897959184 
               2                1                1                1 
55.0255102040816 55.5357142857143 55.5867346938775 55.6122448979592 
               2                2                6                1 
55.6377551020408 55.6632653061224  55.765306122449 56.2244897959184 
               1                2                1                3 
56.2755102040816 56.3010204081633 56.7857142857143 56.8367346938775 
               2                1                3                2 
56.8877551020408 56.9132653061224 57.4234693877551 57.4744897959184 
               1                2                1                5 
58.0357142857143 58.0867346938775 58.1122448979592 58.1377551020408 
               5                6                1                4 
58.6989795918367 58.7244897959184            58.75  59.234693877551 
               1                4                2                1 
59.2857142857143 59.3367346938775 59.3622448979592 59.3877551020408 
               3                4                1                1 
59.8979591836735 59.9744897959184 60.5357142857143 60.5867346938775 
               2                4                2                5 
60.6122448979592 60.6377551020408 61.0969387755102 61.1479591836735 
               1                4                2                1 
61.2244897959184            61.25 61.7857142857143 61.8367346938775 
               3                1                2                8 
61.8622448979592 61.8877551020408 62.4489795918367 62.4744897959184 
               1                3                1                1 
63.0357142857143 63.0867346938775 63.1377551020408 63.1887755102041 
               6                7                3                1 
63.2142857142857 63.6479591836735 63.6734693877551 63.6989795918367 
               1                1                1                2 
63.7244897959184            63.75  64.234693877551 64.2857142857143 
               2                2                1                6 
64.3367346938775 64.3877551020408 64.4132653061224 64.8469387755102 
               3                2                1                1 
64.8979591836735 64.9744897959184               65 65.0255102040816 
               4                3                1                1 
65.5357142857143 65.5867346938775 65.6377551020408 66.1479591836735 
               2                3                1                3 
66.1989795918367 66.2244897959184            66.25 66.2755102040816 
               1                3                2                1 
 66.734693877551 66.7857142857143 66.8367346938775 66.8622448979592 
               1                4                8                1 
66.8877551020408 67.3979591836735 67.4489795918367 67.4744897959184 
               2                5                2                3 
            67.5 68.0357142857143 68.0867346938775 68.1122448979592 
               2                1                9                1 
68.1377551020408 68.5969387755102 68.6479591836735 68.6989795918367 
               2                4                1                7 
68.7244897959184            68.75  69.234693877551 69.3367346938775 
               1                4                1                7 
69.3877551020408 69.4387755102041 69.9489795918367 69.9744897959184 
               2                1                1                5 
70.4336734693878 70.5357142857143 70.5867346938775 70.6377551020408 
               1                2               10                5 
70.6632653061224 71.0969387755102 71.2244897959184            71.25 
               1                1                1                2 
71.2755102040816 71.7857142857143 71.8367346938775 71.9132653061224 
               1                3               10                1 
72.3979591836735 72.4489795918367 72.4744897959184             72.5 
               2                1                2                3 
72.6275510204082  72.984693877551 73.0357142857143 73.0867346938775 
               1                1               10               20 
73.1377551020408 73.1887755102041 73.2142857142857 73.5969387755102 
               7                1                1                1 
73.6479591836735 73.6734693877551 73.6989795918367 73.7244897959184 
               6                1                3                2 
           73.75 73.7755102040816 73.8265306122449  74.234693877551 
               7                2                1                1 
74.2857142857143 74.3367346938775 74.3877551020408 74.8469387755102 
               8               17                6                1 
74.8979591836735 74.9489795918367 74.9744897959184               75 
               2                1                2                6 
75.0255102040816  75.484693877551 75.5357142857143 75.5612244897959 
               1                1               10                1 
75.5867346938775 75.6377551020408 75.6887755102041 75.9948979591837 
              18               12                1                1 
76.0969387755102 76.1479591836735 76.1989795918367 76.2244897959184 
               1                2                7                3 
           76.25 76.2755102040816 76.6836734693878 76.7857142857143 
               8                2                1                5 
76.8367346938775 76.8622448979592 76.8877551020408 76.9387755102041 
              17                1               13                2 
76.9642857142857 77.2959183673469 77.3469387755102 77.3979591836735 
               1                1                2                7 
77.4489795918367 77.4744897959184             77.5 78.0357142857143 
               3                1                9               10 
78.0867346938775 78.1377551020408 78.5969387755102 78.6479591836735 
              33               15                1                6 
78.6734693877551 78.6989795918367 78.7244897959184            78.75 
               1                9                3               15 
78.7755102040816 79.2857142857143 79.3367346938775 79.3877551020408 
               1               14               31               13 
79.8469387755102 79.8979591836735 79.9489795918367 79.9744897959184 
               1                5               10                1 
              80 80.0255102040816 80.0765306122449  80.484693877551 
               9                1                1                1 
80.5357142857143 80.5867346938775 80.6377551020408 80.6632653061224 
              20               56               20                1 
80.6887755102041 81.0969387755102 81.1479591836735 81.1989795918367 
               3                1                9               14 
81.2244897959184            81.25 81.3265306122449  81.734693877551 
               2               16                1                1 
81.7857142857143 81.8367346938775 81.8877551020408 81.9387755102041 
              17               53               24                2 
82.3469387755102 82.3979591836735 82.4489795918367             82.5 
               1               15               20               17 
82.5255102040816  82.984693877551 83.0357142857143 83.0867346938775 
               1                1               11               53 
83.1377551020408 83.1887755102041 83.5969387755102 83.6479591836735 
              28                3                3               23 
83.6989795918367            83.75 83.7755102040816 83.8265306122449 
              19               27                1                1 
83.8775510204082 84.2857142857143 84.3367346938775 84.3877551020408 
               1                6               68               45 
84.4387755102041 84.8469387755102 84.8979591836735 84.9489795918367 
               6                4               42               39 
              85 85.0255102040816  85.484693877551 85.5357142857143 
              36                1                1                5 
85.5867346938775 85.6377551020408 85.6887755102041 86.0969387755102 
              60               47                1                5 
86.1479591836735 86.1989795918367 86.2244897959184            86.25 
              40               64                1               55 
 86.734693877551 86.7857142857143 86.8367346938775 86.8877551020408 
               1                7               39               56 
86.9387755102041 87.3469387755102 87.3979591836735 87.4489795918367 
               6                3               42              103 
            87.5 87.5255102040816 88.0357142857143 88.0867346938775 
              89                1                3               27 
88.1377551020408 88.1887755102041 88.5969387755102 88.6479591836735 
              54                3                1               45 
88.6989795918367            88.75 89.2857142857143 89.3367346938775 
             130              109                2               23 
89.3877551020408 89.4387755102041 89.8469387755102 89.8979591836735 
              50                4                8               45 
89.9489795918367               90 90.5867346938775 90.6377551020408 
             169              207               19               41 
90.6887755102041 91.0969387755102 91.1479591836735 91.1989795918367 
               7                2               43              203 
           91.25 91.3775510204082 91.8367346938775 91.8877551020408 
             268                1                9               36 
91.9387755102041 92.3469387755102 92.3979591836735 92.4489795918367 
               3                1               25              192 
            92.5 93.1377551020408 93.1887755102041 93.6479591836735 
             391               29                3               16 
93.6989795918367            93.75 94.3877551020408 94.4387755102041 
             189              450                6                4 
94.8979591836735 94.9489795918367               95 95.6887755102041 
              13              157              599                3 
96.1989795918367            96.25 97.4489795918367             97.5 
             114              551               48              576 
           98.75              100 
             518              429 
Code
data_scale2$SF_ph <- log10(data_scale$SF_ph)
table(data_scale2$SF_ph)

1.13991119808611 1.17756594462169 1.17903563970246 1.21085336531489 
               1                1                1                1 
1.34070709681079 1.36384213065636 1.37659557672604 1.37752554385206 
               1                1                1                1 
1.40844764578854 1.42914230416503 1.43892963627752 1.44889552749531 
               1                1                1                1 
1.46778925660933 1.48589517902717 1.49378513888608 1.50397124267296 
               1                1                1                2 
 1.5115423366332 1.51324478680192 1.52065728528638 1.52892981125237 
               1                1                1                1 
1.53672588265145 1.55159935126669 1.55222110438921 1.57314404682283 
               1                1                1                1 
  1.587985704539 1.59507985904269 1.60178292944813 1.60233687656648 
               3                2                1                1 
1.60261358538878 1.60865699638119 1.60920225003964 1.61461746336559 
               1                2                1                1 
1.61515528941811 1.62760618219906 1.62812817082188 1.62943040412713 
               4                1                2                1 
1.63434358255055 1.63511429168255 1.64072475056672 1.64676370509219 
               1                1                4                1 
1.64751270409687 1.65296624527886  1.6541961936566 1.65908144743944 
               2                3                1                1 
1.66487212632034 1.66535121570362 1.67058660984477 1.67646030611031 
               1                1                1                3 
1.68156425299621 1.68202528752135 1.68729230334762 1.68774730022727 
               2                2                1                4 
1.68820182091962 1.69384355369865 1.69874836897428 1.70359840851809 
               2                2                2                2 
1.70403664718485  1.7042556007977 1.70904486166394 1.70947764145252 
               3                1                1                2 
1.70990999040003 1.71420997089276 1.71463763659142 1.71994822467427 
               2                1                3                6 
1.72037027959757 1.72415042951464 1.72456842231101 1.72498601319117 
               1                1                3                5 
1.72561164760703 1.73017229982901 1.73058453952005 1.73509353641828 
               1                3                1                4 
1.73529738269374 1.73550113333408 1.73611181234059 1.74016120747629 
               2                1                1                1 
1.74056407808209 1.74457236202064  1.7449711632258 1.74517042658415 
               2                2                6                1 
1.74536959855824 1.74556867923187 1.74636409059323 1.74992552315929 
               1                2                1                3 
1.75031944108371  1.7505162661412 1.75423909297823 1.75462911948123 
               2                1                3                2 
 1.7550187960277 1.75521350326338 1.75908942798006 1.75947512470337 
               1                2                1                5 
1.76369533397267 1.76407696359469 1.76426765272262 1.76445825815992 
               5                6                1                4 
1.76863055164819 1.76881925227332 1.76900787094377  1.7725761483821 
               1                4                2                1 
1.77295005669784 1.77332364337197 1.77351031626627 1.77369690895739 
               3                4                1                1 
1.77741202555512 1.77796656210448 1.78201167119688 1.78237754694043 
               2                4                2                5 
 1.7825603692887 1.78274311470772 1.78601945073012  1.7863819670132 
               1                4                2                1 
1.78692517469115 1.78710609303657 1.79088807178658 1.79124654847379 
               3                1                2                8 
1.79142567591783  1.7916047295101 1.79552534645307 1.79570271810426 
               1                3                1                1 
1.79958667838162 1.79993804934084 1.80028913624913 1.80063993956538 
               6                7                3                1 
1.80081523501959 1.80378448293895 1.80395851398993 1.80413247533089 
               1                1                1                2 
1.80430636701766 1.80448018910599 1.80776965875139 1.80811447376109 
               2                2                1                6 
1.80845901521661 1.80880328355164 1.80897531543422 1.81188947919753 
               3                2                1                1 
1.81223103995592 1.81274287794316 1.81291335664286 1.81308376844881 
               4                3                1                1 
1.81647803724589  1.8168160096224 1.81715371918989 1.82051644974889 
               2                3                1                3 
1.82085129516402 1.82101862110787 1.82118588260885 1.82135307971655 
               1                3                2                1 
1.82435167263177 1.82468357519428 1.82501522429929 1.82518095392614 
               1                4                8                1 
1.82534662033361 1.82864674625805 1.82897538379315 1.82913960935075 
               2                5                2                3 
1.82930377283102 1.83273694866942 1.83306250676705 1.83322519434412 
               2                1                9                1 
1.83338782100092 1.83630473520284  1.8366276307433 1.83695028639105 
               2                4                1                7 
1.83711152436651  1.8372727025023 1.84032377630326 1.84096338537602 
               1                4                1                7 
1.84128283701374 1.84160205384686 1.84478138343304 1.84493974058407 
               2                1                1                5 
1.84778033961881 1.84840906862026 1.84872309212049 1.84903688872512 
               1                2               10                5 
1.84919370204399 1.85185090169285 1.85262934693067 1.85278486868055 
               1                1                1                2 
1.85294033475771 1.85603802607827 1.85634658344962 1.85680900885115 
               1                3               10                1 
  1.859726324101 1.86003227302658 1.86018516670248 1.86033800657099 
               2                1                2                3 
 1.8611014001265 1.86323179078481 1.86353528100114 1.86383855928295 
               1                1               10               20 
1.86414162592603 1.86444448122554 1.86459582971354 1.86685975047129 
               7                1                1                1 
1.86716071686026 1.86731112187714 1.86746147482374 1.86761177573609 
               6                1                3                2 
 1.8677620246502 1.86791222160204 1.86821245976256 1.87060692196545 
               7                2                1                1 
1.87090530362054 1.87120348041351 1.87150145262548 1.87417404248681 
               8               17                6                1 
1.87446998422358 1.87476572443378 1.87491351905216  1.8750612633917 
               2                1                2                6 
1.87520895748661 1.87785889814018 1.87815234036884 1.87829898716473 
               1                1               10                1 
1.87844558445959 1.87873863067982 1.87903147929638 1.88078443619459 
              18               12                1                1 
1.88136718634161 1.88165826844493 1.88194915558367  1.8820945261229 
               1                2                7                3 
1.88223984801882 1.88238512130397 1.88470290923043 1.88528042857339 
               8                2                1                5 
1.88556890050821  1.8857130646529 1.88585718095816 1.88614527017728 
              17                1               13                2 
1.88628924315453 1.88815656148185 1.88844312993956 1.88872950943025 
               1                1                2                7 
1.88901570020299 1.88915872489781 1.88930170250631 1.89229340996422 
               3                1                9               10 
1.89257726257688 1.89286092978612 1.89540563129648 1.89568745770605 
              33               15                1                6 
1.89582830235846  1.8959691013488 1.89610985470667 1.89625056246164 
               1                9                3               15 
1.89639122464324 1.89919494310842 1.89947432200638 1.89975352129719 
               1               14               31               13 
1.90225827052599 1.90253568636545 1.90281292511211 1.90295147814628 
               1                5               10                1 
1.90308998699194 1.90322845167729 1.90350524867959  1.9057132965597 
               9                1                1                1 
1.90598851487176 1.90626355888469 1.90653842881912 1.90667579857573 
              20               56               20                1 
1.90681312489527 1.90900446089432 1.90927760208691 1.90955057160055 
               3                1                9               14 
1.90968699204517 1.90982336965091 1.91023224570362 1.91240644039174 
               2               16                1                1 
1.91267745099767 1.91294829259167 1.91321896538441 1.91348946958619 
              17               53               24                2 
1.91564745902958 1.91591645531065 1.91618528508209 1.91645394854993 
               1               15               20               17 
1.91658821798426  1.9189979962614 1.91926492588375 1.91953169154442 
               1                1               11               53 
1.91979829344469  1.9200647317855 1.92219037436192 1.92245534964891 
              28                3                3               23 
1.92272016336559 1.92298481570888 1.92311708142695 1.92338149207859 
              19               27                1                1 
1.92364574184756 1.92575397162789 1.92601678221497 1.92627943386005 
               1                6               68               45 
1.92654192675526 1.92863617786304 1.92889725059823 1.92915816648586 
               6                4               42               39 
1.92941892571429 1.92954924664007 1.93188836081481 1.93214748640836 
              36                1                1                5 
1.93240645748455 1.93266527422756 1.93292393682121 1.93498771014659 
              60               47                1                5 
1.93524499361495 1.93550212475444  1.9356306332572 1.93575910374531 
              40               64                1               55 
 1.9381928500218 1.93844824225609 1.93870348439209 1.93895857660613 
               1                7               39               56 
1.93921351907421 1.94124768898466 1.94150129160903  1.9417547462307 
               6                3               42              103 
1.94200805302231 1.94213465103572 1.94465889227103   1.944910511329 
              89                1                3               27 
1.94516198468976 1.94541331252195 1.94741871629031 1.94766874190568 
              54                3                1               45 
 1.9479186236628 1.94816836172713 1.95078197732982 1.95103007472697 
             130              109                2               23 
1.95127803047559 1.95152584473732  1.9535032846108 1.95374983271955 
              50                4                8               45 
1.95399624094285 1.95424250943932 1.95706460528116 1.95730914046887 
             169              207               19               41 
1.95755353804533 1.95950378317232 1.95974694918198 1.95998997911664 
               7                2               43              203 
1.96023287312851 1.96083951449256 1.96301643374683 1.96325764146306 
             268                1                9               36 
1.96349871528657 1.96542250351271 1.96566237895758 1.96590212198432 
               3                1               25              192 
1.96614173273903 1.96912576592927 1.96936360519146 1.97149831748353 
             391               29                3               16 
1.97173486132484 1.97197127639976 1.97491565704654 1.97515034739643 
             189              450                6                4 
1.97725687286144 1.97749030177429 1.97772360528885 1.98086099713027 
              13              157              599                3 
1.98317046538516 1.98340073818054 1.98877729589125 1.98900461569854 
             114              551               48              576 
 1.9945371042985                2 
             518              429 
Code
SF_ph_preF_factor_fix <- lm(SF_ph ~ Age + Weight + Height + as.factor(Intervention_months_factor), 
                     data = data_scale2, 
                     subset = data_scale2$Gender == "Female" & data_scale2$PostMeno == 0)

qqnorm(resid(SF_ph_preF_factor_fix), col = "grey")
qqline(resid(SF_ph_preF_factor_fix), col = "dodgerblue", lwd = 2)

Code
shapiro.test(resid(SF_ph_preF_factor_fix))

    Shapiro-Wilk normality test

data:  resid(SF_ph_preF_factor_fix)
W = 0.67074, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.

#multicollinearity
car::vif(SF_ph_preF) # no VIF value that exceeds 5 or 10 
                                          GVIF Df GVIF^(1/(2*Df))
Age                                   1.065418  1        1.032191
Weight                                1.206128  1        1.098239
Height                                1.140722  1        1.068046
as.factor(Intervention_months_factor) 1.004341  4        1.000542

Postmenopausal females:

Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity bad,  constant variance looks bad (heteroscedasticity)
plot(SF_ph_postF, which = 1)

Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot(SF_ph_postF, which = 3) # Linearity bad, constant variance bad

Code
# formal test in the form of Breusch-Pagan 
bptest(SF_ph_postF) #  reject the null of homoscedasticity. The constant variance assumption is violated.

    studentized Breusch-Pagan test

data:  SF_ph_postF
BP = 24.922, df = 7, p-value = 0.0007833
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot(SF_ph_postF, 2) #suspect

Code
# formal test in the form of Shapiro-Wilk
shapiro.test(resid(SF_ph_postF)) #reject for the data sampled from normal

    Shapiro-Wilk normality test

data:  resid(SF_ph_postF)
W = 0.80407, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot(SF_ph_postF, which = 4, id.n = 3)

Code
# inspect the largest
model.data <- augment(SF_ph_postF) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
  .rownames SF_ph Age[,1] Weight[,1] Height[,1] as.factor(Intervention…¹ .fitted
  <chr>     <dbl>   <dbl>      <dbl>      <dbl> <fct>                      <dbl>
1 4246       16.2   14.0       11.8        7.24 4                           87.0
2 4440       29.4    4.98       8.81       4.24 4                           87.7
3 6837       32.6    6.98     -14.2      -13.8  3                           88.2
# ℹ abbreviated name: ¹​`as.factor(Intervention_months_factor)`
# ℹ 6 more variables: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
# Plot standardized residuals
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = SF_ph), alpha = .5) +
  theme_bw()

Code
# Filter potential influential data point
model.data %>% 
  filter(abs(.std.resid) > 4) # some influential data points
# A tibble: 14 × 13
   .rownames SF_ph Age[,1] Weight[,1] Height[,1] as.factor(Interventio…¹ .fitted
   <chr>     <dbl>   <dbl>      <dbl>      <dbl> <fct>                     <dbl>
 1 289        33.2    2.98      12.8       1.24  2                          84.6
 2 1423       40.7    4.98     -13.2      -6.76  2                          89.1
 3 2400       41.2   23.0       -8.19     -0.760 0                          89.2
 4 2408       27.5   17.0        6.81     -1.76  0                          86.5
 5 2658       38.7   28.0       -9.19    -19.8   0                          88.4
 6 3087       38.7   12.0       -7.19     -6.76  2                          87.8
 7 3814       23.9    6.98       9.81     -2.76  0                          86.1
 8 4246       16.2   14.0       11.8       7.24  4                          87.0
 9 4440       29.4    4.98       8.81      4.24  4                          87.7
10 5472       35.6   11.0       14.8       7.24  0                          85.6
11 5626       42.5    6.98      -1.19     -2.76  0                          88.2
12 5887       40.0    9.98      17.8       2.24  1                          85.2
13 6837       32.6    6.98     -14.2     -13.8   3                          88.2
14 6898       41.2   15.0       -8.19    -10.8   3                          87.0
# ℹ abbreviated name: ¹​`as.factor(Intervention_months_factor)`
# ℹ 6 more variables: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance(SF_ph_postF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow(data_scale)
plot(cooksd, main = "Cooks Distance for Influential Obs")
abline(h = (8/n) * 3, lty = 2, col = "steelblue") # add cutoff line

Code
# log transform to pass normality. 
SF_ph_postF_factor_fix <- lm(SF_ph ~ Age + Weight + Height + as.factor(Intervention_months_factor), 
                     data = data_scale2, 
                     subset = data_scale2$Gender == "Female" & data_scale2$PostMeno == 1)

qqnorm(resid(SF_ph_postF_factor_fix), col = "grey")
qqline(resid(SF_ph_postF_factor_fix), col = "dodgerblue", lwd = 2)

Code
shapiro.test(resid(SF_ph_postF_factor_fix))

    Shapiro-Wilk normality test

data:  resid(SF_ph_postF_factor_fix)
W = 0.68121, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.

#multicollinearity
car::vif(SF_ph_postF) # no VIF value that exceeds 5 or 10 
                                          GVIF Df GVIF^(1/(2*Df))
Age                                   1.033504  1        1.016614
Weight                                1.150857  1        1.072780
Height                                1.172847  1        1.082981
as.factor(Intervention_months_factor) 1.012156  4        1.001511

Males:

Code
### LINEARITY (Lnearity and constant variance assumptions)
#Fitted versus Residuals Plot: Bad
plot(SF_ph_M, which = 1)      

Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot(SF_ph_M, which = 3) # suspect

Code
# formal test in the form of Breusch-Pagan 
bptest(SF_ph_M)# small p-value, so we reject the null of homoscedasticity. The constant variance assumption is violated.

    studentized Breusch-Pagan test

data:  SF_ph_M
BP = 33.594, df = 7, p-value = 2.052e-05
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot(SF_ph_M, 2) # suspect

Code
# formal test in the form of Shapiro-Wilk
shapiro.test(resid(SF_ph_M)) # reject for the data sampled from normal

    Shapiro-Wilk normality test

data:  resid(SF_ph_M)
W = 0.77114, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot(SF_ph_M, which = 4, id.n = 3)

Code
# inspect the largest
model.data <- augment(SF_ph_M) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
  .rownames SF_ph Age[,1] Weight[,1] Height[,1] as.factor(Intervention…¹ .fitted
  <chr>     <dbl>   <dbl>      <dbl>      <dbl> <fct>                      <dbl>
1 1657       15.1    20.0       1.81      -4.76 0                           89.9
2 3057       15.1    13.0      71.8        2.24 4                           83.0
3 3062       23.1    19.0      23.8      -11.8  2                           86.0
# ℹ abbreviated name: ¹​`as.factor(Intervention_months_factor)`
# ℹ 6 more variables: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
# Plot standardized residuals
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = SF_ph), alpha = .5) +
  theme_bw()

Code
# Filter potential influential data point
model.data %>% 
  filter(abs(.std.resid) > 3) # some influential data points
# A tibble: 84 × 13
   .rownames SF_ph Age[,1] Weight[,1] Height[,1] as.factor(Interventio…¹ .fitted
   <chr>     <dbl>   <dbl>      <dbl>      <dbl> <fct>                     <dbl>
 1 407        56.8    25.0      -3.19       3.24 2                          90.7
 2 437        40.6   -14.0      -3.19      12.2  2                          92.9
 3 440        49.4    13.0      -7.19      -3.76 2                          90.7
 4 581        63.1   -12.0      -7.19       1.24 0                          92.6
 5 635        47.5    20.0       9.81       1.24 0                          89.7
 6 662        40.0    24.0      16.8        1.24 0                          88.7
 7 755        26.9   -20.0       7.81       6.24 0                          91.7
 8 759        44.4    12.0      -6.19       3.24 0                          91.9
 9 823        58.0    16.0      46.8        7.24 0                          86.3
10 825        59.3    27.0      -8.19      -9.76 0                          90.2
# ℹ 74 more rows
# ℹ abbreviated name: ¹​`as.factor(Intervention_months_factor)`
# ℹ 6 more variables: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance(SF_ph_M)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow(data_scale)
plot(cooksd, main = "Cooks Distance for Influential Obs")
abline(h = (8/n) * 3, lty = 2, col = "steelblue") # add cutoff line

Code
# log transform to pass normality. 
SF_ph_Mfactor_fix <- lm(SF_ph~ Age + Weight + Height + as.factor(Intervention_months_factor), 
                     data = data_scale2, 
                     subset = data_scale2$Gender == "Female" & data_scale2$PostMeno == 1)

qqnorm(resid(SF_ph_Mfactor_fix), col = "grey")
qqline(resid(SF_ph_Mfactor_fix), col = "dodgerblue", lwd = 2)

Code
shapiro.test(resid(SF_ph_Mfactor_fix))

    Shapiro-Wilk normality test

data:  resid(SF_ph_Mfactor_fix)
W = 0.68121, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.

#multicollinearity
car::vif(SF_ph_M) # no VIF value that exceeds 5 or 10 
                                          GVIF Df GVIF^(1/(2*Df))
Age                                   1.092442  1        1.045200
Weight                                1.274162  1        1.128788
Height                                1.251161  1        1.118553
as.factor(Intervention_months_factor) 1.007564  4        1.000942
Decision

We conclude that the assumptions were violated and thus we use robust models.

Robust models

Code
SF_ph_preF_robust <- lmrob(SF_ph ~ Age + Weight + Height + as.factor(Intervention_months_factor), data = data_scale, subset  = data_scale$Gender == "Female" & data_scale$PostMeno == 0)
SF_ph_postF_robust <- lmrob(SF_ph ~ Age + Weight + Height + as.factor(Intervention_months_factor), data = data_scale,  subset = data_scale$Gender == "Female" & data_scale$PostMeno == 1)
SF_ph_Mrobust <- lmrob(SF_ph ~ Age + Weight + Height + as.factor(Intervention_months_factor), data = data_scale, subset = data_scale$Gender == "Male")

pl <- c(
  `(Intercept)` = "Intercept",
  `as.factor(Intervention_months_factor)1` = "6-12 months",
  `as.factor(Intervention_months_factor)2` = "12-18 months",
  `as.factor(Intervention_months_factor)3` = "18-24 months",
  `as.factor(Intervention_months_factor)4` = "24+ months"
  
)

tab_model(SF_ph_preF_robust, SF_ph_postF_robust, SF_ph_Mrobust, pred.labels = pl, dv.labels = c("Pre-menopausal women", "Post-menopausal women", "Men"), show.reflvl = T, title = "Physical health (Sf-36), higher score indicates better health (robust regression)", digits = 3)
Physical health (Sf-36), higher score indicates better health (robust regression)
  Pre-menopausal women Post-menopausal women Men
Predictors Estimates CI p Estimates CI p Estimates CI p
Intercept 92.473 91.820 – 93.126 <0.001 91.859 90.882 – 92.836 <0.001 93.180 92.842 – 93.519 <0.001
Age 0.056 0.021 – 0.090 0.001 -0.053 -0.106 – -0.000 0.049 -0.026 -0.038 – -0.013 <0.001
Weight -0.098 -0.125 – -0.071 <0.001 -0.107 -0.140 – -0.073 <0.001 -0.067 -0.084 – -0.049 <0.001
Height 0.067 0.023 – 0.111 0.003 0.036 -0.023 – 0.095 0.228 0.056 0.026 – 0.086 <0.001
6-12 months -0.042 -0.760 – 0.676 0.909 -0.331 -1.320 – 0.659 0.512 -0.425 -0.975 – 0.124 0.129
12-18 months 0.324 -0.391 – 1.038 0.375 -1.117 -2.143 – -0.092 0.033 -0.466 -1.001 – 0.069 0.088
18-24 months 0.106 -0.788 – 1.001 0.816 -1.014 -2.572 – 0.543 0.201 -0.352 -1.069 – 0.365 0.336
24+ months -0.640 -1.471 – 0.192 0.131 0.777 -0.429 – 1.983 0.207 -0.119 -0.640 – 0.402 0.653
Observations 2398 1468 3582
R2 / R2 adjusted 0.032 / 0.029 0.040 / 0.035 0.032 / 0.030

Mental health

Coded using https://meetinstrumentenzorg.nl/wp-content/uploads/instrumenten/SF-36-RAND-36-form.pdf

Predict by ferritin

Code
res <- cor.test(data_scale$SF_mh, data_scale$Ferritin,  method = "pearson", use = "complete.obs")
ggplot(data_scale, mapping = aes(x = SF_ph, y = Ferritin)) + 
  geom_point() + 
  theme_bw() +
  geom_text(x = 75, y = 1000, label=paste('r(',res$parameter,') = ', round(res$estimate,3),', p = ', round(res$p.value, 3), sep = '')) +
  labs(x="Mental health (SF-36), higher is better", y="Ferritin")

Code
# plot for cutoffs
data_scale <- data_scale %>% mutate(Fergroup = factor(case_when(Ferritin < 15 ~ 1, Ferritin >= 15 & Ferritin <= 30~ 2, Ferritin > 30 ~ 3)) %>%  fct_recode("Ferritin < 15" = "1", "Ferritin 15-30" = "2", "Ferritin > 30" = "3"))

ggplot(data_scale, aes(x=Fergroup, y=SF_mh)) +
  geom_boxplot() + 
  theme_bw() +
  labs(x="Ferritin cut-off groups", y="Mental health (SF-36), higher is better")

Models

Code
SF_mh_preF<- lm(SF_mh ~ Age + Weight + Height + as.factor(Intervention_months_factor) , data = data_scale, subset  = data_scale$Gender == "Female" & data_scale$PostMeno == 0)
SF_mh_postF <- lm(SF_mh ~ Age + Weight + Height + as.factor(Intervention_months_factor) , data = data_scale, subset = data_scale$Gender == "Female" & data_scale$PostMeno == 1)
SF_mh_M <- lm(SF_mh ~ Age + Weight + Height + as.factor(Intervention_months_factor), data = data_scale, subset = data_scale$Gender == "Male")

pl <- c(
  `(Intercept)` = "Intercept",
  `as.factor(Intervention_months_factor)1` = "6-12 months",
  `as.factor(Intervention_months_factor)2` = "12-18 months",
  `as.factor(Intervention_months_factor)3` = "18-24 months",
  `as.factor(Intervention_months_factor)4` = "24+ months"
  
)


tab_model(SF_mh_preF, SF_mh_postF, SF_mh_M, pred.labels = pl, dv.labels = c("Pre-menopausal women", "Post-menopausal women", "Men"), title = "Mental health (Sf-36), higher score indicates better health", show.reflvl = T, digits=3)
Mental health (Sf-36), higher score indicates better health
  Pre-menopausal women Post-menopausal women Men
Predictors Estimates CI p Estimates CI p Estimates CI p
Intercept 80.147 78.628 – 81.666 <0.001 77.548 75.935 – 79.161 <0.001 81.271 80.582 – 81.961 <0.001
Age 0.266 0.185 – 0.348 <0.001 0.268 0.176 – 0.361 <0.001 0.167 0.140 – 0.194 <0.001
Weight -0.054 -0.109 – 0.001 0.054 -0.087 -0.144 – -0.031 0.003 -0.026 -0.061 – 0.009 0.142
Height 0.094 -0.007 – 0.194 0.068 0.102 -0.002 – 0.205 0.055 0.075 0.013 – 0.136 0.017
6-12 months -1.196 -2.917 – 0.525 0.173 0.540 -1.203 – 2.283 0.543 -0.525 -1.694 – 0.644 0.378
12-18 months 1.321 -0.375 – 3.017 0.127 0.315 -1.330 – 1.960 0.707 -0.683 -1.799 – 0.433 0.230
18-24 months -2.222 -4.358 – -0.087 0.041 -0.597 -3.168 – 1.974 0.649 -0.839 -2.392 – 0.713 0.289
24+ months -0.744 -2.590 – 1.102 0.429 0.322 -1.804 – 2.449 0.766 -0.556 -1.750 – 0.638 0.361
Observations 2404 1476 3595
R2 / R2 adjusted 0.023 / 0.020 0.028 / 0.024 0.042 / 0.040

Assumptions

Premenopausal females:

Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity bad,  constant variance meh (heteroscedasticity)
plot(SF_mh_preF,  which = 1)

Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot (also known as  spread-location plot)
plot(SF_mh_preF, which = 3) # Linearity bad, constant variance meh

Code
# formal test in the form of Breusch-Pagan 
bptest(SF_mh_preF) # reject the null of homoscedasticity. The constant variance assumption is  violated.

    studentized Breusch-Pagan test

data:  SF_mh_preF
BP = 26.608, df = 7, p-value = 0.0003921
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot(SF_mh_preF , 2) #suspect

Code
# formal test in the form of Shapiro-Wilk
shapiro.test(resid(SF_mh_preF)) #reject  the data sampled from normal

    Shapiro-Wilk normality test

data:  resid(SF_mh_preF)
W = 0.82854, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot(SF_mh_preF,  which = 4, id.n = 3)

Code
# inspect the largest
model.data <- augment(SF_mh_preF) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
  .rownames SF_mh Age[,1] Weight[,1] Height[,1] as.factor(Intervention…¹ .fitted
  <chr>     <dbl>   <dbl>      <dbl>      <dbl> <fct>                      <dbl>
1 3925       18.2   -2.02       6.81      7.24  3                           77.7
2 4065       11.2  -17.0       35.8     -10.8   1                           71.5
3 7349       33.4  -13.0       49.8      -0.760 1                           72.7
# ℹ abbreviated name: ¹​`as.factor(Intervention_months_factor)`
# ℹ 6 more variables: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
# Plot standardized residuals
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = SF_mh), almha = .5) +
  theme_bw()

Code
# Filter potential influential data point
model.data %>% 
  filter(abs(.std.resid) > 4) # some influential data points
# A tibble: 5 × 13
  .rownames SF_mh Age[,1] Weight[,1] Height[,1] as.factor(Intervention…¹ .fitted
  <chr>     <dbl>   <dbl>      <dbl>      <dbl> <fct>                      <dbl>
1 1659      15.6    -12.0      26.8        1.24 0                           75.6
2 2029      11.9    -23.0     -13.2       -1.76 0                           74.6
3 4065      11.2    -17.0      35.8      -10.8  1                           71.5
4 4595      14.9    -16.0       6.81       1.24 0                           75.6
5 6838       8.38   -20.0     -11.2       -1.76 3                           73.0
# ℹ abbreviated name: ¹​`as.factor(Intervention_months_factor)`
# ℹ 6 more variables: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance(SF_mh_preF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow(data_scale)
plot(cooksd, main = "Cooks Distance for Influential Obs")
abline(h = (8/n) * 3, lty = 2, col = "steelblue") # add cutoff line

Code
# log transform to pass normality. 
data_scale2 <- data_scale
table(data_scale2$SF_mh)

               3            8.375            11.25           11.875 
               1                1                1                1 
            12.5            13.75           14.875            15.25 
               1                1                1                1 
          15.625            16.25           16.875           17.125 
               1                1                1                2 
           17.25             17.5            18.25           18.375 
               2                1                1                1 
           19.25             19.5               20             20.5 
               2                1                1                2 
          20.625           21.125            22.25 22.3333333333333 
               1                1                1                1 
          22.625               23           24.125            24.25 
               3                1                1                1 
            24.5           24.625            24.75           24.875 
               1                1                1                1 
            25.5           25.625            25.75           25.875 
               2                1                3                1 
25.9583333333333            26.25           26.375             26.5 
               1                2                2                2 
           26.75           27.125            27.25           27.625 
               1                2                2                2 
           27.75               28 28.2083333333333            28.25 
               2                2                1                1 
28.3333333333333           28.375             28.5           28.625 
               1                1                2                2 
           29.25           29.625 29.7083333333333 29.9583333333333 
               1                2                1                2 
          30.125 30.2083333333333            30.25             30.5 
               2                1                1                2 
           30.75               31           31.125            31.25 
               1                2                1                2 
            31.5           31.625           31.875           32.125 
               3                1                4                3 
           32.25 32.3333333333333           32.375             32.5 
               2                1                1                1 
           32.75           32.875           33.125           33.375 
               3                1                2                2 
            33.5 33.5833333333333           33.625 33.7083333333333 
               3                1                2                1 
           33.75           33.875 33.9583333333333               34 
               4                6                1                1 
          34.125            34.25           34.375             34.5 
               1                1                5                1 
          34.625 34.7083333333333            34.75           34.875 
               2                2                1                1 
              35           35.125 35.2083333333333            35.25 
               3                4                1                2 
35.3333333333333           35.375             35.5           35.625 
               1                2                2                1 
           35.75 36.0833333333333           36.125 36.2083333333333 
               1                1                2                1 
           36.25           36.375           36.625            36.75 
               2                1                7                1 
          36.875 36.9583333333333 37.0833333333333           37.125 
               2                1                1                1 
37.4583333333333             37.5           37.625           37.875 
               1                1                4                1 
37.9583333333333            38.25           38.375 38.4583333333333 
               1                1                2                4 
            38.5 38.5833333333333           38.625            38.75 
               2                1                1                1 
38.8333333333333           38.875 38.9583333333333               39 
               1                2                1                1 
          39.125 39.2083333333333            39.25           39.375 
               1                1                1                1 
            39.5 39.5833333333333           39.625            39.75 
               2                1                2                2 
          39.875 40.0833333333333           40.125 40.2083333333333 
               1                1                1                1 
40.3333333333333           40.375             40.5 40.5833333333333 
               2                1                1                1 
          40.625 40.7083333333333            40.75           40.875 
               1                1                3                2 
              41           41.125 41.1666666666667 41.2083333333333 
               2                3                1                1 
           41.25 41.4583333333333             41.5 41.5833333333333 
               1                1                1                1 
41.7083333333333            41.75 41.8333333333333 41.9583333333333 
               3                2                2                1 
              42 42.0833333333333           42.125 42.2083333333333 
               1                3                3                2 
           42.25 42.3333333333333 42.4583333333333             42.5 
               1                1                3                1 
42.7083333333333            42.75           42.875 42.9583333333333 
               1                9                4                1 
              43           43.125            43.25 43.3333333333333 
               1                2                2                1 
          43.375             43.5           43.625 43.7083333333333 
               2                2                1                1 
           43.75 43.9583333333333               44           44.125 
               4                2                3                2 
44.1666666666667 44.2083333333333            44.25 44.3333333333333 
               1                2                1                2 
          44.375 44.4583333333333           44.625 44.7083333333333 
               2                2                1                1 
44.8333333333333 45.2083333333333            45.25 45.3333333333333 
               1                1                2                1 
          45.375 45.4583333333333 45.5416666666667 45.5833333333333 
               4                1                1                3 
45.7083333333333           45.875             46.5 46.5416666666667 
               2                1                2                1 
46.5833333333333           46.625 46.7083333333333            46.75 
               3                1                2                2 
46.8333333333333 46.9583333333333 47.0416666666667           47.125 
               3                1                1                1 
47.2083333333333            47.25 47.4583333333333             47.5 
               1                2                2                3 
47.5833333333333 47.6666666666667 47.7083333333333 47.8333333333333 
               1                1                1                2 
          47.875 47.9583333333333               48           48.125 
               3                1                2                1 
48.2083333333333            48.25 48.2916666666667 48.3333333333333 
               6                1                1                1 
48.4583333333333             48.5 48.5416666666667 48.5833333333333 
               2                5                1                1 
48.8333333333333 48.9583333333333 49.0833333333333           49.125 
               5                2                1                2 
49.2083333333333            49.25           49.375 49.4583333333333 
               1                2                3                4 
            49.5           49.625 49.7083333333333           49.875 
               1                2                2                1 
49.9583333333333 50.0833333333333 50.1666666666667 50.2083333333333 
               1                3                1                1 
50.2916666666667 50.3333333333333           50.375 50.4583333333333 
               1                1                3                1 
50.5833333333333           50.625 50.7083333333333 50.7916666666667 
               1                1                1                2 
50.8333333333333           50.875 50.9166666666667 50.9583333333333 
               1                1                2                2 
51.0416666666667 51.0833333333333 51.2083333333333            51.25 
               1                1                2                3 
51.3333333333333           51.375 51.4583333333333 51.7083333333333 
               2                1                2                4 
           51.75 51.7916666666667 51.8333333333333 51.9166666666667 
               2                4                3                1 
              52 52.0416666666667 52.0833333333333           52.125 
               1                1                2                1 
52.1666666666667 52.2083333333333            52.25 52.3333333333333 
               1                1                1                1 
          52.375 52.4583333333333 52.5833333333333           52.625 
               1                2                3                1 
52.6666666666667 52.7083333333333            52.75 52.7916666666667 
               1                1                2                3 
          52.875 52.9583333333333               53 53.0416666666667 
               2                1                1                4 
53.0833333333333 53.1666666666667 53.2083333333333            53.25 
               3                1                4                1 
53.3333333333333 53.4583333333333             53.5 53.5416666666667 
               1                2                4                1 
53.5833333333333 53.7083333333333            53.75 53.7916666666667 
               4                1                1                1 
53.8333333333333           53.875 53.9583333333333           54.125 
               2                1                2                1 
54.1666666666667 54.2083333333333 54.2916666666667 54.3333333333333 
               1                2                4                1 
54.4166666666667 54.4583333333333 54.5833333333333           54.625 
               3                3                2                1 
54.6666666666667            54.75 54.7916666666667 54.8333333333333 
               1                1                2                3 
          54.875 54.9166666666667 54.9583333333333               55 
               2                3                1                2 
55.0833333333333 55.1666666666667 55.2083333333333            55.25 
               3                1                1                2 
55.2916666666667 55.4166666666667 55.4583333333333 55.7083333333333 
               2                1                2                3 
           55.75 55.7916666666667 55.8333333333333 55.9166666666667 
               1                5                2                1 
              56 56.0416666666667 56.0833333333333 56.1666666666667 
               1                1                1                2 
56.2083333333333            56.25 56.2916666666667 56.3333333333333 
               2                1                1                1 
          56.375 56.4583333333333 56.5416666666667 56.5833333333333 
               1                3                4                3 
          56.625 56.6666666666667 56.7083333333333            56.75 
               1                1                4                1 
56.8333333333333           56.875 56.9166666666667 56.9583333333333 
               2                1                2                3 
              57 57.0416666666667 57.0833333333333 57.1666666666667 
               1                3                2                2 
57.2083333333333 57.3333333333333 57.4166666666667 57.4583333333333 
               2                5                2                2 
57.5416666666667 57.5833333333333 57.6666666666667 57.7083333333333 
               3                1                1                2 
57.7916666666667 57.8333333333333 57.9166666666667 57.9583333333333 
               2                1                1                1 
              58 58.0833333333333           58.125 58.1666666666667 
               3                3                1                5 
58.2916666666667 58.3333333333333           58.375 58.4166666666667 
               2                2                1                1 
58.4583333333333 58.5416666666667 58.5833333333333 58.6666666666667 
               1                2                1                1 
58.7083333333333            58.75 58.7916666666667 58.8333333333333 
               2                1                5                5 
          58.875 58.9166666666667 58.9583333333333               59 
               4                2                2                1 
59.0416666666667 59.0833333333333            59.25 59.2916666666667 
               1                1                1                2 
59.3333333333333 59.4583333333333             59.5 59.5416666666667 
               1                2                1                4 
59.5833333333333 59.6666666666667 59.7916666666667 59.8333333333333 
               2                3                4                3 
59.9166666666667 59.9583333333333               60 60.0833333333333 
               1                1                1                2 
60.1666666666667 60.2083333333333 60.2916666666667           60.375 
               1                1                1                2 
60.4583333333333             60.5 60.5416666666667 60.5833333333333 
               2                4                1                1 
60.6666666666667            60.75 60.7916666666667 60.8333333333333 
               2                1                1                2 
          60.875 60.9583333333333 61.0416666666667 61.0833333333333 
               2                1                1                2 
          61.125 61.1666666666667 61.2083333333333            61.25 
               1                3                1                1 
61.2916666666667 61.3333333333333 61.4166666666667 61.4583333333333 
               2                1                1                3 
61.5416666666667 61.5833333333333 61.6666666666667 61.7083333333333 
               5                2                4                2 
61.7916666666667           61.875 61.9166666666667 61.9583333333333 
               4                3                4                2 
              62 62.0416666666667 62.0833333333333           62.125 
               3                3                2                2 
62.1666666666667 62.2083333333333            62.25 62.2916666666667 
               3                1                1                2 
          62.375 62.4166666666667             62.5 62.5416666666667 
               4                1                2                4 
62.5833333333333           62.625 62.6666666666667 62.7916666666667 
               2                4                3                1 
62.8333333333333           62.875 62.9166666666667               63 
               1                2                3                1 
63.0416666666667 63.0833333333333 63.1666666666667 63.2083333333333 
               2                3                6                1 
           63.25           63.375 63.4166666666667 63.4583333333333 
               1                3                1                1 
            63.5 63.5416666666667 63.5833333333333           63.625 
               1                1                1                3 
63.6666666666667            63.75 63.7916666666667           63.875 
               2                3                2                1 
63.9166666666667               64 64.0416666666667 64.0833333333333 
               3                1                4                1 
64.1666666666667 64.2083333333333            64.25 64.2916666666667 
               4                1                4                2 
          64.375 64.4166666666667 64.4583333333333             64.5 
               2                1                1                2 
64.5416666666667           64.625 64.6666666666667            64.75 
               3                3                4                3 
64.7916666666667 64.9166666666667 64.9583333333333               65 
               1                4                1                2 
65.0416666666667 65.0833333333333           65.125 65.1666666666667 
               3                1                1                5 
65.2083333333333            65.25 65.2916666666667 65.3333333333333 
               1                2                3                2 
          65.375 65.4166666666667 65.4583333333333             65.5 
               3                6                2                5 
65.5833333333333           65.625 65.6666666666667            65.75 
               1                8                5                3 
65.7916666666667 65.8333333333333           65.875 65.9166666666667 
               2                2                5                5 
              66 66.0416666666667           66.125 66.1666666666667 
               4                4                1                6 
           66.25 66.2916666666667           66.375 66.4166666666667 
               5                2                2                3 
66.4583333333333             66.5 66.5416666666667 66.5833333333333 
               1                6                1                1 
          66.625 66.6666666666667 66.7083333333333            66.75 
               2                2                3                4 
66.7916666666667 66.8333333333333           66.875 66.9166666666667 
               3                1                4                3 
66.9583333333333               67 67.0416666666667           67.125 
               1                4                3                4 
67.1666666666667            67.25 67.2916666666667           67.375 
               4                1                2                3 
67.4166666666667             67.5 67.5416666666667 67.5833333333333 
               4                3                1                1 
          67.625 67.6666666666667            67.75 67.7916666666667 
               6                4                3                1 
67.8333333333333           67.875 67.9166666666667               68 
               2                5                2                4 
68.0416666666667           68.125 68.1666666666667            68.25 
               3                4                3                2 
68.2916666666667           68.375 68.4166666666667             68.5 
               6                3                6                3 
68.5416666666667 68.5833333333333           68.625 68.6666666666667 
               3                1                3                6 
           68.75 68.7916666666667           68.875 68.9166666666667 
               5                3                2                3 
              69 69.0416666666667           69.125 69.1666666666667 
               5                2                4                1 
           69.25 69.2916666666667           69.375 69.4166666666667 
              11                5                9                2 
            69.5 69.5416666666667 69.5833333333333           69.625 
               3                4                1                7 
69.6666666666667            69.75 69.7916666666667           69.875 
               6                5                2                7 
69.9166666666667               70 70.0416666666667 70.0833333333333 
               2                4                1                1 
          70.125 70.1666666666667            70.25 70.2916666666667 
               5                5                5                3 
70.3333333333333           70.375 70.4166666666667             70.5 
               1                6                4                4 
70.5416666666667           70.625            70.75 70.7916666666667 
               3                6                8                1 
          70.875 70.9166666666667               71 71.0833333333333 
              14                1                3                1 
          71.125 71.1666666666667            71.25           71.375 
               6                1               11               13 
71.4166666666667             71.5 71.5416666666667           71.625 
               6                8                3                7 
71.6666666666667            71.75 71.7916666666667           71.875 
               3                8                4                7 
71.9166666666667               72           72.125 72.1666666666667 
               3               11                7                3 
           72.25 72.2916666666667           72.375 72.4166666666667 
              17                3                9                3 
            72.5 72.5416666666667           72.625 72.6666666666667 
               8                4                7                2 
           72.75           72.875 72.9166666666667               73 
               6               12                1                9 
73.0416666666667           73.125 73.1666666666667            73.25 
               5               16                2                7 
          73.375 73.4166666666667             73.5           73.625 
              18                7               13                9 
73.6666666666667            73.75 73.7916666666667           73.875 
               4               14                4                9 
73.9166666666667               74 74.0416666666667           74.125 
               4               13                2               10 
74.1666666666667            74.25           74.375 74.4166666666667 
               3               15               20                1 
            74.5           74.625 74.6666666666667            74.75 
              12               19                5               13 
74.7916666666667           74.875 74.9166666666667               75 
               1               10                2               19 
75.0416666666667           75.125 75.1666666666667            75.25 
               1                8                1               17 
          75.375 75.4166666666667             75.5           75.625 
              28                2               16               16 
75.6666666666667            75.75 75.7916666666667           75.875 
               5               19                1               20 
75.9166666666667               76 76.0416666666667           76.125 
               6               14                5               14 
76.1666666666667            76.25           76.375             76.5 
               1               18               15               13 
76.5416666666667           76.625 76.6666666666667            76.75 
               1               20                1               12 
          76.875 76.9166666666667               77           77.125 
              17                3               24               10 
77.1666666666667            77.25 77.2916666666667           77.375 
               1               19                2               24 
77.4166666666667             77.5           77.625            77.75 
               2               16               34               20 
          77.875 77.9166666666667               78           78.125 
              31                3               26                9 
78.1666666666667            78.25           78.375 78.4166666666667 
               5               18               14                2 
            78.5           78.625            78.75 78.7916666666667 
              29               27               33                1 
          78.875 78.9166666666667               79 79.0416666666667 
              37                2               32                1 
          79.125 79.1666666666667            79.25           79.375 
              16                1               39               17 
79.4166666666667             79.5 79.5416666666667           79.625 
               5               43                1               22 
79.6666666666667            79.75           79.875               80 
               1               57               46               37 
          80.125 80.1666666666667            80.25           80.375 
              35                1               31               10 
80.4166666666667             80.5           80.625 80.6666666666667 
               3               52               10                2 
           80.75 80.7916666666667           80.875               81 
              41                1               36               55 
          81.125            81.25           81.375 81.4166666666667 
              57               36               18                1 
            81.5           81.625            81.75           81.875 
              35               10               67               14 
              82           82.125 82.1666666666667            82.25 
              95               46                1               47 
          82.375 82.4166666666667             82.5           82.625 
              38                1               40                6 
82.6666666666667            82.75           82.875               83 
               1               71                9              127 
          83.125            83.25           83.375 83.4166666666667 
              28               76               37                1 
            83.5           83.625            83.75           83.875 
              39               16               64                5 
              84           84.125            84.25           84.375 
             129               17              152               26 
            84.5           84.625            84.75           84.875 
              36               15               43                5 
              85           85.125 85.1666666666667            85.25 
             107                3                1              195 
          85.375             85.5           85.625            85.75 
              15              119               22               37 
          85.875               86           86.125            86.25 
               5               57                2              186 
          86.375 86.4166666666667             86.5           86.625 
               3                1              233               14 
           86.75           86.875               87           87.125 
              55                8               24                2 
           87.25           87.375             87.5           87.625 
              84                1              275                3 
           87.75           87.875               88           88.125 
             136                6               33                6 
           88.25             88.5            88.75           88.875 
              39              162              238                7 
              89           89.125            89.25             89.5 
              61                4               21               64 
           89.75           89.875               90           90.125 
             201                2              110                5 
           90.25             90.5            90.75               91 
              14               21              102              161 
          91.125            91.25           91.375             91.5 
               4               48                1                4 
           91.75               92            92.25           92.375 
              38              116               89                1 
            92.5            92.75               93            93.25 
              11                2               63               88 
            93.5           93.625            93.75               94 
              29                1                1                1 
           94.25             94.5            94.75             95.5 
              80               48                1              118 
           95.75               96             96.5            96.75 
               5                1                4               18 
            97.5            97.75               98            98.75 
               1                2                2                1 
              99              100 
               1                4 
Code
data_scale2$SF_mh <- log(data_scale$SF_mh)
table(data_scale2$SF_mh)

1.09861228866811 2.12525107771113 2.42036812865043 2.47443534992071 
               1                1                1                1 
2.52572864430826 2.62103882411258 2.69968195143169 2.72457950305342 
               1                1                1                1 
2.74887219562247 2.78809290877575 2.82583323675859 2.84053938414829 
               1                1                1                2 
2.84781214347737 2.86220088092947  2.9041650800285  2.9109910450989 
               2                1                1                1 
2.95751106073379  2.9704144655697 2.99573227355399 3.02042488614436 
               2                1                1                2 
3.02650393222074 3.05045717324324 3.10234200861225 3.10608033072286 
               1                1                1                1 
3.11905548958599 3.13549421592915 3.18324864722505 3.18841661738349 
               3                1                1                1 
3.19867311755068 3.20376218705815  3.2088254890147 3.21386328304466 
               1                1                1                1 
3.23867845216438 3.24356843745857 3.24843462710975 3.25327725158553 
               2                1                3                1 
3.25649268843951 3.26766598903763 3.27241659179623 3.27714473299218 
               1                2                2                2 
3.28653447334202 3.30045581186062 3.30505352110925 3.31872115983792 
               1                2                2                2 
3.32323584019244  3.3322045101752 3.33961744256433 3.34109345759245 
               2                2                1                1 
3.34403896782221 3.34550847580157  3.3499040872746  3.3542804618744 
               1                1                2                2 
3.37587957367787  3.3886185994553 3.39142759006635  3.3998075273731 
               1                2                1                2 
3.40535539181082 3.40811782450673 3.40949618447685 3.41772668361337 
               2                1                1                2 
3.42588999425253 3.43398720448515 3.43801135478487 3.44201937618241 
               1                2                1                2 
3.44998754583159 3.45394794704768 3.46182200347859 3.46963454321538 
               3                1                4                3 
3.47351804324178 3.47609868983527  3.4773865200197 3.48124008933569 
               2                1                1                1 
3.48890296208126 3.49271249049793 3.50028828430639 3.50780711672041 
               3                1                2                2 
3.51154543883102 3.51402991215868   3.515269837922 3.51774508671055 
               3                1                2                1 
3.51898041731854 3.52267727919986 3.52513428289292 3.52636052461616 
               4                6                1                1 
3.53003025350512 3.53368656470823 3.53732955598674 3.54095932403731 
               1                1                5                1 
 3.5445759645075  3.5469798118189  3.5481795720108 3.55177024014153 
               2                2                1                1 
3.55534806148941 3.55891312765391 3.56128279700923 3.56246552925828 
               3                4                1                2 
3.56482680544396  3.5660053559634 3.56953269648137 3.57304763858881 
               1                2                2                1 
3.57655026914002 3.58583107821449  3.5869851464326 3.58928929491745 
               1                1                2                1 
3.59043938130068 3.59388172549166 3.60073106733723 3.60413822565885 
               2                1                7                1 
3.60753381465998 3.60979115196163 3.61316763237824 3.61429059712286 
               2                1                1                1 
3.62322920412367 3.62434093297637 3.62766872306904 3.63429126382953 
               1                1                4                1 
3.63648906691201 3.64414356027254 3.64740620590736 3.64957540415491 
               1                1                2                4 
3.65065824129374 3.65282040429823 3.65389973521791 3.65713075579936 
               2                1                1                1 
3.65927898433765  3.6603513704994 3.66249269894074 3.66356164612965 
               1                2                1                1 
3.66676164886032 3.66888930923743 3.66995144422842  3.6731310971458 
               1                1                1                1 
3.67630067190708 3.67840815424664 3.67946023219744 3.68260984110034 
               2                1                2                2 
3.68574956110501 3.69096062031776 3.69199958145018 3.69407427099104 
               1                1                1                1 
3.69717825692863 3.69821078154282 3.70130197411249 3.70335747329459 
               2                1                1                1 
 3.7043836406499 3.70643282169484 3.70745583968687 3.71051862921742 
               1                1                3                2 
3.71357206670431 3.71661620908554 3.71762886739992 3.71864050127477 
               2                3                1                1 
3.71965111278069 3.72468890681065 3.72569342723665 3.72769944596352 
               1                1                1                1 
3.73070094896727 3.73169945129686 3.73369346990373 3.73667706237062 
               3                2                2                1 
3.73766961828337 3.73965177948736 3.74064138867253 3.74261767390074 
               1                3                3                2 
3.74360435380318 3.74557479779048 3.74852320287478 3.74950407593037 
               1                1                3                1 
3.75439406122456 3.75536919538277  3.7582889054861 3.76023065366901 
               1                9                4                1 
3.76120011569356 3.76410287535152 3.76699723337789 3.76892216178747 
               1                2                2                1 
3.76988323826702 3.77276093809464 3.77563038052259 3.77753877804835 
               2                2                1                1 
3.77849161280362 3.78324221556222 3.78418963391826 3.78702651525346 
               4                2                3                2 
3.78797035675817 3.78891330826604 3.78985537145394 3.79173683955364 
               1                2                1                2 
3.79267624779558 3.79455242095381  3.7982942400998 3.80015991228275 
               2                2                1                1 
3.80295191037378 3.81128143562661 3.81220267014594 3.81404259706794 
               1                1                2                1 
3.81496129258501 3.81679615548513 3.81862765782859 3.81954215263398 
               4                1                1                3 
3.82228062992728 3.82592030637473 3.83945231259331 3.84034796872126 
               2                1                2                1 
 3.8412428233671 3.84213687796398 3.84392259272421  3.8448142557347 
               3                1                2                2 
3.84659520010569 3.84926068369183 3.85103373380172 3.85280364576817 
               3                1                1                1 
3.85457043068006 3.85545265393975 3.85985213309924  3.8607297110406 
               1                2                2                3 
3.86248255986801  3.8642323415918 3.86510608564039 3.86772274653157 
               1                1                1                2 
3.86859344750081 3.87033257837394 3.87120101090789 3.87380179260795 
               3                1                2                1 
3.87553189684573 3.87639582778499 3.87725901299181 3.87812145375246 
               6                1                1                1 
3.88070432217072 3.88156379794344 3.88242253565186 3.88328053656249 
               2                5                1                1 
3.88841313978901 3.89096959623031 3.89351953386359 3.89436807018943 
               5                2                1                2 
3.89606298584942  3.8969093676181 3.89944422322129 3.90113056426172 
               1                2                3                4 
3.90197266957464 3.90449473900735 3.90617259174997 3.90951987521003 
               1                2                2                1 
3.91118932467957 3.91368828474721 3.91535079552082 3.91618101557681 
               1                3                1                1 
3.91783939074959 3.91866754814681 3.91949502026685 3.92114791320515 
               1                1                3                1 
 3.9236221412715  3.9244455254267 3.92609026263958 3.92773229913333 
               1                1                1                2 
3.92855230737936 3.92937164376276 3.93019030938359 3.93100830533923 
               1                1                2                2 
3.93264229263088 3.93345828614821 3.93590227921809 3.93671561801852 
               1                1                2                3 
3.93834031374552  3.9391516728164 3.94077241871413 3.94561895485666 
               2                1                2                4 
3.94642443214548 3.94722926116277 3.94803344295118 3.94963986899945 
               2                4                3                1 
3.95124371858143 3.95204467977763  3.9528449999484 3.95364468011897 
               1                1                2                1 
 3.9544437213121 3.95524212454812 3.95603989084492  3.9576335166802 
               1                1                1                1 
 3.9584293782423  3.9600192036964 3.96239921275321 3.96319129200255 
               1                2                3                1 
3.96398274435886 3.96477357081368 3.96556377235618 3.96635334997319 
               1                1                2                3 
3.96793063736644 3.96950544084151 3.97029191355212 3.97107776820946 
               2                1                1                4 
3.97186300578416 3.97343163355679 3.97421502568459 3.97499780458953 
               3                1                4                1 
3.97656152656572 3.97890253426769 3.97968165390196 3.98046016698137 
               1                2                4                1 
3.98123807444962 3.98356817259124 3.98434366700777  3.9851185604987 
               4                1                1                1 
 3.9858928539946 3.98666654842391 3.98821214378569 3.99129618632265 
               2                1                2                1 
3.99206571310168 3.99283464816456  3.9943707467769 3.99513791213865 
               1                2                4                1 
3.99667047948843 3.99743588327628 3.99972858584725 4.00049165341575 
               3                3                2                1 
4.00125413915609 4.00277736869661 4.00353811426392 4.00429828153732 
               1                1                2                3 
4.00505787139534 4.00581688471451 4.00657532236937 4.00733318523247 
               2                3                1                2 
4.00884719006369 4.01035890614901 4.01111390807238 4.01186834039786 
               3                1                1                2 
4.01262220398426 4.01488039086785 4.01563198804717   4.020129746754 
               2                1                2                3 
4.02087741034023 4.02162451534323 4.02237106259701 4.02386248718368 
               1                5                2                1 
4.02535169073515 4.02609546168799 4.02683867985673 4.02832346112431 
               1                1                1                2 
4.02906502585981 4.02980604108453 4.03054650761225 4.03128642625496 
               2                1                1                1 
4.03202579782284 4.03350290296586 4.03497782948692  4.0357144777707 
               1                3                4                3 
 4.0364505838032 4.03718614838215 4.03792117230352 4.03865565636151 
               1                1                4                1 
4.04012300805546 4.04085587727111 4.04158820978279   4.042320006376 
               2                1                2                3 
4.04305126783455  4.0437819949405 4.04451218847422 4.04597097793788 
               1                3                2                2 
4.04669957542003 4.04888218814534 4.05033462122566 4.05106004744536 
               2                5                2                2 
4.05250932306135 4.05323317397967 4.05467930582967 4.05540158827349 
               3                1                1                2 
4.05684458996689  4.0575653107188 4.05900519577679  4.0597243615755 
               2                1                1                1 
4.06044301054642 4.06187876097252 4.06259586390752 4.06331245297437 
               3                3                1                5 
4.06545914431754  4.0661736852554 4.06688771598906 4.06760123724659 
               2                2                1                1 
4.06831424975451  4.0697387514199 4.07045024202266 4.07187170637004 
               1                2                1                1 
4.07258168155073 4.07329115302427 4.07400012150487 4.07470858770524 
               2                1                5                5 
4.07541655233658 4.07612401610857 4.07683097972939 4.07753744390572 
               4                2                2                1 
4.07824340934273 4.07894887674413 4.08176578001524 4.08246876774191 
               1                1                1                2 
4.08317126162398 4.08527578712889 4.08597631255158 4.08667634758192 
               1                2                1                4 
4.08737589290601 4.08877351717264 4.09086629784578 4.09156291926022 
               2                3                4                3 
4.09295470793305 4.09364987653942  4.0943445622221 4.09573248749695 
               1                1                1                2 
4.09711848910483 4.09781077019859 4.09919389628354 4.10057511197274 
               1                1                1                2 
4.10195442253624  4.1026433650368 4.10333183322234 4.10401982774552 
               2                4                1                1 
4.10539439840869 4.10676708222066 4.10745271817484 4.10813788435444 
               2                1                1                2 
4.10882258140275 4.11019057067218 4.11155669110322 4.11223905209865 
               2                1                1                2 
4.11292094779504 4.11360237882652 4.11428334582593 4.11496384942484 
               1                3                1                1 
4.11564389025349 4.11632346894088 4.11768124240134 4.11835943842597 
               2                1                1                3 
4.11971445218343  4.1203912711602 4.12174353641022 4.12241898391985 
               5                2                4                2 
4.12376851178999 4.12511622088885 4.12578939492976 4.12646211611221 
               4                3                4                2 
4.12713438504509 4.12780620233606 4.12847756859156 4.12914848441679 
               3                3                2                2 
4.12981895041576 4.13048896719125 4.13115853534482 4.13182765547684 
               3                1                1                2 
4.13316455407168 4.13383233372922 4.13516655674236 4.13583300128552 
               4                1                2                4 
4.13649900197613 4.13716455940503 4.13782967416184 4.13982236827855 
               2                4                3                1 
4.14048571821996  4.1411486284199 4.14181109946102 4.14313472639153 
               1                2                3                1 
4.14379588344041 4.14445660364945 4.14577673585437 4.14643614900059 
               2                3                6                1 
4.14709512760763 4.14906946191135 4.14972670807369 4.15038352254722 
               1                3                1                1 
4.15103990589865 4.15169585869357 4.15235138149646 4.15300647487069 
               1                1                1                3 
4.15366113937852 4.15496918403854 4.15562256530974 4.15692804852387 
               2                3                2                1 
4.15758015157926 4.15888308335967 4.15953391319065 4.16018431971764 
               3                1                4                1 
4.16148386505973 4.16213300497217 4.16278172377533 4.16343002201522 
               4                1                4                2 
 4.1647253589839 4.16537239879942 4.16601902022512 4.16666522380173 
               2                1                1                2 
4.16731101006892 4.16860133282859 4.16924587039522 4.17053370057965 
               3                3                4                3 
4.17117699426539 4.17310439608275 4.17374603870983 4.17438726989564 
               1                4                1                2 
4.17502809016749 4.17566850005169 4.17630850007353 4.17694809075731 
               3                1                1                5 
4.17758727262631  4.1782260462028 4.17886441200807 4.17950237056241 
               1                2                3                2 
4.18013992238509 4.18077706799441 4.18141380790768 4.18205014264121 
               3                6                2                5 
 4.1833215986294 4.18395672091179 4.18459144006988 4.18585967105787 
               1                8                5                3 
 4.1864931839077 4.18712629567307 4.18775900686153 4.18839131797965 
               2                2                5                5 
4.18965474202643 4.19028585596344 4.19154689017846 4.19217681145914 
               4                4                1                6 
4.19343546486633 4.19406419798984  4.1953204795621 4.19594802900221 
               5                2                2                3 
  4.196575184871 4.19720194766181 4.19782831786707 4.19845429597827 
               1                6                1                1 
4.19907988248601 4.19970507787993 4.20032988264877 4.20095429728036 
               2                2                3                4 
4.20157832226161 4.20220195807851 4.20282520521617 4.20344806415876 
               3                1                4                3 
4.20407053538957 4.20469261939097 4.20531431664444 4.20655655282903 
               1                4                3                4 
4.20717709271863 4.20841701848195 4.20903640530881 4.21027402922916 
               4                1                2                3 
4.21089226727049 4.21212759787848 4.21274469138773 4.21336140432741 
               4                3                1                1 
4.21397773716665 4.21459369037368 4.21582445975981 4.21643927687109 
               6                4                3                1 
4.21705371621454  4.2176677782541 4.21828146345286 4.21950770517611 
               2                5                2                4 
4.22012026262252 4.22134425298341 4.22195568681475 4.22317743406507 
               3                4                3                2 
4.22378774839588 4.22500726074214 4.22561645966443 4.22683374526818 
               6                3                6                3 
4.22744183285153 4.22804955088907 4.22865689982969 4.22926388012147 
               3                1                3                6 
4.23047673654668 4.23108261357218 4.23229326747308 4.23289804523569 
               5                3                2                3 
4.23410650459726 4.23471018707862  4.2359164598425 4.23651905100264 
               5                2                4                1 
4.23772314506745 4.23832464884498  4.2395265720666 4.24012699237884 
              11                5                9                2 
4.24132675257075 4.24192609331389 4.24252507506286 4.24312369824745 
               3                4                1                7 
 4.2437219632967 4.24491742070147 4.24551461391122 4.24670793147526 
               6                5                2                7 
4.24730405667921 4.24849524204936 4.24909030306067 4.24968501018495 
               2                4                1                1 
4.25027936384286 4.25087336445433 4.25206030821386 4.25265325219802 
               5                5                5                3 
4.25324584480796 4.25383808645985 4.25442997756917 4.25561270981822 
               1                6                4                4 
4.25620355178519 4.25738418946661 4.25915253652335 4.25974129132399 
               3                6                8                1 
4.26091776204792 4.26150547878537 4.26267987704132 4.26385289770368 
              14                1                3                1 
4.26443889244649 4.26502454400057 4.26619481914876 4.26794766797617 
               6                1               11               13 
4.26853126880978 4.26969744969996 4.27028003054953  4.2714441750349 
               6                8                3                7 
4.27202573945955 4.27318785463973 4.27376840617998 4.27492849911751 
               3                8                4                7 
4.27550804129543 4.27666611901606 4.27840072482826 4.27897825877443 
               3               11                7                3 
4.28013232699254 4.28070886203301 4.28186093589316 4.28243647547739 
              17                3                9                3 
4.28358656186063 4.28416110942024 4.28530921517208 4.28588277412098 
               8                4                7                2 
 4.2870289060516 4.28874564467066 4.28931723656961 4.29045944114839 
               6               12                1                9 
4.29103005457329 4.29217030555202 4.29273994384712 4.29387824789718 
               5               16                2                7 
4.29558327814826 4.29615097614818 4.29728540621879 4.29898464197175 
              18                7               13                9 
4.29955041284964 4.30068099521993 4.30124580743489 4.30237447572626 
               4               14                4                9 
4.30293833252158 4.30406509320417 4.30462799780671 4.30575285731789 
               4               13                2               10 
4.30631481293818 4.30743777768281 4.30911986386579  4.3096799310885 
               3               15               20                1 
4.31079912538551 4.31247557171277 4.31303376318693  4.3141492122708 
              12               19                5               13 
4.31470647057443 4.31582005643561 4.31637638468362 4.31748811353631 
               1               10                2               19 
4.31804351482801 4.31915339285537 4.31970787027462 4.32081590362899 
               1                8                1               17 
4.32247565504735 4.32302829391193 4.32413265625498 4.32578691635101 
              28                2               16               16 
4.32633772881329 4.32743844438948 4.32798834817018 4.32908724937966 
               5               19                1               20 
4.32963624747196 4.33073334028633 4.33128143566865 4.33237672603006 
               6               14                5               14 
4.33292392166615 4.33401741548752 4.33565541749176 4.33729074083249 
               1               18               15               13 
4.33783525486718 4.33892339425638 4.33946702025509 4.34055338646731 
               1               20                1               12 
4.34218072612668 4.34272258471485 4.34380542185368 4.34542748222555 
              17                3               24               10 
4.34596758485818 4.34704691577786 4.34758614469359 4.34866373100476 
               1               19                2               24 
4.34920208902584  4.3502779363593 4.35188954025364 4.35349855105934 
               2               16               34               20 
4.35510497710762 4.35563987950069 4.35670882668959 4.35831010805657 
              31                3               26                9 
4.35884329921822 4.35990882942026 4.36150499895308 4.36203648979738 
               5               18               14                2 
4.36309862478836  4.3646897150206 4.36627827770574 4.36680723831051 
              29               27               33                1 
4.36786432086138 4.36839244339808 4.36944785246702 4.36997513958707 
              37                2               32                1 
4.37102888046434 4.37155533480659 4.37260741275739 4.37418345721286 
              16                1               39               17 
 4.3747082538662 4.37575702166029  4.3762809933778 4.37732811389233 
               5               43                1               22 
 4.3778512632634 4.37889674166495  4.3804629126977 4.38202663467388 
               1               57               46               37 
4.38358791524083 4.38410780087771 4.38514676201012 4.38670318255778 
              35                1               31               10 
4.38722145155099 4.38825718442452 4.38980877511594 4.39032543748858 
               3               52               10                2 
4.39135796210277 4.39187382489471 4.39290475282106 4.39444915467244 
              41                1               36               55 
4.39599117502425 4.39753082120985 4.39906810052873 4.39958000225478 
              57               36               18                1 
4.40060302024682 4.40213558759659 4.40366580977736 4.40519369395542 
              35               10               67               14 
4.40671924726425 4.40824247680477 4.40874970481464 4.40976338964548 
              95               46                1               47 
4.41128199282267 4.41178768183471 4.41279829334063 4.41431229817185 
              38                1               40                6 
4.41481645749687 4.41582401425717 4.41733344850603  4.4188406077966 
               1               71                9              127 
4.42034549897602 4.42184812886055 4.42334850423579 4.42384812952722 
              28               76               37                1 
4.42484663185681 4.42634251844839 4.42783617070518 4.42932759529185 
              39               16               64                5 
4.43081679884331 4.43230378796489 4.43378856923247 4.43527114919269 
             129               17              152               26 
4.43675153436313 4.43822973123244 4.43970574626056 4.44117958587886 
              36               15               43                5 
4.44265125649032 4.44412076446968 4.44461012097565 4.44558811616363 
             107                3                1              195 
4.44705331789095 4.44851637594271 4.44997729658239 4.45143608604605 
              15              119               22               37 
4.45289275054251 4.45434729625351 4.45579972933382 4.45725005591147 
               5               57                2              186 
4.45869828208783 4.45918055844153 4.46014441393783 4.46158845751007 
               3                1              233               14 
4.46303041882697 4.46447030388496 4.46590811865458 4.46734386908069 
              55                8               24                2 
4.46877756108254 4.47020920055397 4.47163879336357 4.47306634535475 
              84                1              275                3 
4.47449186234598 4.47591535013083 4.47733681447821 4.47875626113243 
             136                6               33                6 
4.48017369581341 4.48300255201388 4.48582342835553 4.48723088812341 
              39              162              238                7 
4.48863636973214 4.49003987873446 4.49144142065975 4.49423862528081 
              61                4               21               64 
4.49702802736839 4.49841981604121 4.49980967033027 4.50119759560511 
             201                2              110                5 
4.50258359721299 4.50534985070588 4.50810847314496 4.51085950651685 
              14               21              102              161 
4.51223219032882  4.5136029924626 4.51497191806994 4.51633897228148 
               4               48                1                4 
4.51906748693468 4.52178857704904 4.52450228292064 4.52585637926837 
              38              116               89                1 
4.52720864451838 4.52990770148754 4.53259949315326 4.53528405852393 
              11                2               63               88 
4.53796143629464 4.53929744183738 4.54063166485052    4.54329478227 
              29                1                1                1 
4.54595082632812  4.5485998344997 4.55124184396254 4.55912624748668 
              80               48                1              118 
4.56174062806076 4.56434819146784 4.56954300834494 4.57213033190989 
               5                1                4               18 
 4.5798523780038 4.58241319886548 4.58496747867057 4.59259140378123 
               1                2                2                1 
4.59511985013459 4.60517018598809 
               1                4 
Code
SF_mh_preF_factor_log<-lm(log(SF_mh) ~ Age + Weight + Height + as.factor(Intervention_months_factor), 
                     data = data_scale, 
                     subset = data_scale$Gender == "Female" & data_scale$PostMeno == 0)

plot(SF_mh_preF_factor_log, 2)

Code
shapiro.test(resid(SF_mh_preF_factor_log))

    Shapiro-Wilk normality test

data:  resid(SF_mh_preF_factor_log)
W = 0.70988, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.

#multicollinearity
car::vif(SF_mh_preF) # no VIF value that exceeds 5 or 10 
                                          GVIF Df GVIF^(1/(2*Df))
Age                                   1.066108  1        1.032525
Weight                                1.207108  1        1.098685
Height                                1.140604  1        1.067991
as.factor(Intervention_months_factor) 1.003858  4        1.000481

Postmenopausal females:

Code
#### FEMALE post
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity bad,  constant variance looks bad (heteroscedasticity)
plot(SF_mh_postF, which = 1)

Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot(SF_mh_postF, which = 3) # Linearity bad, constant variance bad

Code
# formal test in the form of Breusch-Pagan 
bptest(SF_mh_postF) #  reject the null of homoscedasticity. The constant variance assumption is violated.

    studentized Breusch-Pagan test

data:  SF_mh_postF
BP = 22.92, df = 7, p-value = 0.00176
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot(SF_mh_postF, 2) #suspect

Code
# formal test in the form of Shapiro-Wilk
shapiro.test(resid(SF_mh_postF)) #reject for the data sampled from normal

    Shapiro-Wilk normality test

data:  resid(SF_mh_postF)
W = 0.80728, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot(SF_mh_postF, which = 4, id.n = 3)

Code
# inspect the largest
model.data <- augment(SF_mh_postF) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
  .rownames SF_mh Age[,1] Weight[,1] Height[,1] as.factor(Intervention…¹ .fitted
  <chr>     <dbl>   <dbl>      <dbl>      <dbl> <fct>                      <dbl>
1 3296       13.8   11.0       19.8       14.2  0                           80.2
2 5278       29.7    5.98      36.8        4.24 2                           76.7
3 6670       22.6    3.98       1.81      -9.76 4                           77.8
# ℹ abbreviated name: ¹​`as.factor(Intervention_months_factor)`
# ℹ 6 more variables: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
# Plot standardized residuals
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = SF_mh), almha = .5) +
  theme_bw()

Code
# Filter potential influential data point
model.data %>% 
  filter(abs(.std.resid) > 4) # some influential data points
# A tibble: 9 × 13
  .rownames SF_mh Age[,1] Weight[,1] Height[,1] as.factor(Intervention…¹ .fitted
  <chr>     <dbl>   <dbl>      <dbl>      <dbl> <fct>                      <dbl>
1 514        27.1    7.98      -6.19       6.24 0                           80.9
2 1423       25.8    4.98     -13.2       -6.76 2                           79.7
3 1438       19.2    8.98      11.8       -5.76 2                           78.7
4 1689       27.2    3.98      -4.19      -1.76 0                           78.8
5 3296       13.8   11.0       19.8       14.2  0                           80.2
6 3954       33.1   17.0       -2.19     -16.8  2                           80.9
7 4753       28.2    4.98       8.81      -8.76 0                           77.2
8 5844       31.5   11.0      -12.2      -17.8  0                           79.8
9 6670       22.6    3.98       1.81      -9.76 4                           77.8
# ℹ abbreviated name: ¹​`as.factor(Intervention_months_factor)`
# ℹ 6 more variables: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance(SF_mh_postF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow(data_scale)
plot(cooksd, main = "Cooks Distance for Influential Obs")

Code
# log transform to pass normality. 
SF_mh_postF_factor_fix <- lm(log1p(SF_mh) ~  log1p(Age) +  log1p(Weight) +  log1p(Height) + Intervention_months_factor, 
                     data = data_scale, 
                     subset = data_scale$Gender == "Female" & data_scale$PostMeno == 1)

qqnorm(resid(SF_mh_postF_factor_fix), col = "grey")
qqline(resid(SF_mh_postF_factor_fix), col = "dodgerblue", lwd = 2)

Code
shapiro.test(resid(SF_mh_postF_factor_fix))

    Shapiro-Wilk normality test

data:  resid(SF_mh_postF_factor_fix)
W = 0.65258, p-value = 1.188e-15
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.

#multicollinearity
car::vif(SF_mh_postF) # no VIF value that exceeds 5 or 10 
                                          GVIF Df GVIF^(1/(2*Df))
Age                                   1.035224  1        1.017459
Weight                                1.147491  1        1.071210
Height                                1.172199  1        1.082681
as.factor(Intervention_months_factor) 1.011478  4        1.001428

Males:

Code
### MALE
### LINEARITY (Lnearity and constant variance assumptions)
#Fitted versus Residuals Plot: Good
plot(SF_mh_M, which = 1)      

Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot(SF_mh_M, which = 3) # suspect

Code
# formal test in the form of Breusch-Pagan 
bptest(SF_mh_M)# small p-value, so we reject the null of homoscedasticity. The constant variance assumption is violated.

    studentized Breusch-Pagan test

data:  SF_mh_M
BP = 24.332, df = 7, p-value = 0.0009959
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot(SF_mh_M, 2) # suspect

Code
# formal test in the form of Shapiro-Wilk
shapiro.test(resid(SF_mh_M)) # reject for the data sampled from normal

    Shapiro-Wilk normality test

data:  resid(SF_mh_M)
W = 0.7886, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot(SF_mh_M, which = 4, id.n = 3)

Code
# inspect the largest
model.data <- augment(SF_mh_M) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
  .rownames SF_mh Age[,1] Weight[,1] Height[,1] as.factor(Intervention…¹ .fitted
  <chr>     <dbl>   <dbl>      <dbl>      <dbl> <fct>                      <dbl>
1 3057       35.1   13.0        71.8       2.24 4                           81.2
2 4049       19.5  -22.0        56.8       5.24 1                           76.0
3 6804        3     -2.02       29.8       5.24 4                           80.0
# ℹ abbreviated name: ¹​`as.factor(Intervention_months_factor)`
# ℹ 6 more variables: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
# Plot standardized residuals
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = SF_mh), almha = .5) +
  theme_bw()

Code
# Filter potential influential data point
model.data %>% 
  filter(abs(.std.resid) > 3) # some influential data points
# A tibble: 90 × 13
   .rownames SF_mh Age[,1] Weight[,1] Height[,1] as.factor(Interventio…¹ .fitted
   <chr>     <dbl>   <dbl>      <dbl>      <dbl> <fct>                     <dbl>
 1 423        39.6   -9.02     -18.2        3.24 2                          79.8
 2 500        39.2   -4.02      -2.19      18.2  0                          82.0
 3 501        42.5  -14.0        1.81       3.24 0                          79.1
 4 581        38.5  -12.0       -7.19       1.24 0                          79.5
 5 662        31     24.0       16.8        1.24 0                          84.9
 6 759        38.2   12.0       -6.19       3.24 0                          83.7
 7 856        42.8    7.98      -6.19       3.24 0                          83.0
 8 897        31.9   -9.02      66.8       23.2  2                          79.1
 9 959        37.6   -5.02      13.8        8.24 0                          80.7
10 974        41.2   15.0       17.8        7.24 0                          83.8
# ℹ 80 more rows
# ℹ abbreviated name: ¹​`as.factor(Intervention_months_factor)`
# ℹ 6 more variables: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance(SF_mh_M)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow(data_scale)
plot(cooksd, main = "Cooks Distance for Influential Obs")
abline(h = (8/n) * 3, lty = 2, col = "steelblue") # add cutoff line

Code
# log transform to pass normality. 
SF_mh_Mfactor_fix <- lm(SF_mh~ Age + Weight + Height + as.factor(Intervention_months_factor), 
                     data = data_scale2, 
                     subset = data_scale2$Gender == "Female" & data_scale2$PostMeno == 1)

qqnorm(resid(SF_mh_Mfactor_fix), col = "grey")
qqline(resid(SF_mh_Mfactor_fix), col = "dodgerblue", lwd = 2)

Code
shapiro.test(resid(SF_mh_Mfactor_fix))

    Shapiro-Wilk normality test

data:  resid(SF_mh_Mfactor_fix)
W = 0.68271, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.


#multicollinearity
car::vif(SF_mh_M) # no VIF value that exceeds 5 or 10 
                                          GVIF Df GVIF^(1/(2*Df))
Age                                   1.093016  1        1.045474
Weight                                1.274398  1        1.128892
Height                                1.251832  1        1.118853
as.factor(Intervention_months_factor) 1.007197  4        1.000897
Decision

We conclude that the assumptions were violated and thus we use robust models.

Robust models

Code
SF_mh_preF_robust <- lmrob(SF_mh ~ Age + Weight + Height + as.factor(Intervention_months_factor) , data = data_scale, subset  = data_scale$Gender == "Female" & data_scale$PostMeno == 0)
SF_mh_postF_robust <- lmrob(SF_mh ~ Age + Weight + Height + as.factor(Intervention_months_factor) , data = data_scale, subset = data_scale$Gender == "Female" & data_scale$PostMeno == 1)
SF_mh_Mrobust <- lmrob(SF_mh ~ Age + Weight + Height + as.factor(Intervention_months_factor), data = data_scale, subset = data_scale$Gender == "Male")


pl <- c(
  `(Intercept)` = "Intercept",
  `as.factor(Intervention_months_factor)1` = "6-12 months",
  `as.factor(Intervention_months_factor)2` = "12-18 months",
  `as.factor(Intervention_months_factor)3` = "18-24 months",
  `as.factor(Intervention_months_factor)4` = "24+ months"
  
)


tab_model(SF_mh_preF_robust, SF_mh_postF_robust, SF_mh_Mrobust, pred.labels = pl, dv.labels = c("Pre-menopausal women", "Post-menopausal women", "Men"), title = "Robust: Mental health (Sf-36), higher score indicates better health", show.reflvl = T)
Robust: Mental health (Sf-36), higher score indicates better health
  Pre-menopausal women Post-menopausal women Men
Predictors Estimates CI p Estimates CI p Estimates CI p
Intercept 83.37 82.56 – 84.18 <0.001 82.70 81.68 – 83.72 <0.001 84.61 84.16 – 85.06 <0.001
Age 0.13 0.08 – 0.18 <0.001 0.13 0.07 – 0.18 <0.001 0.11 0.09 – 0.13 <0.001
Weight -0.00 -0.04 – 0.03 0.896 -0.05 -0.08 – -0.01 0.014 -0.02 -0.04 – 0.01 0.140
Height 0.02 -0.05 – 0.08 0.591 0.05 -0.01 – 0.12 0.104 0.05 0.01 – 0.09 0.008
6-12 months -1.25 -2.25 – -0.24 0.016 -0.25 -1.29 – 0.79 0.638 0.10 -0.56 – 0.75 0.771
12-18 months -0.04 -1.01 – 0.92 0.929 0.30 -0.75 – 1.34 0.579 -0.46 -1.13 – 0.22 0.184
18-24 months -1.29 -2.90 – 0.32 0.117 -1.37 -3.16 – 0.42 0.133 -0.16 -1.10 – 0.79 0.745
24+ months -0.98 -2.14 – 0.18 0.098 0.06 -1.13 – 1.24 0.925 -0.34 -1.02 – 0.34 0.330
Observations 2404 1476 3595
R2 / R2 adjusted 0.019 / 0.016 0.022 / 0.017 0.058 / 0.056

Warm glow

Warm glow is defined by a score of >= 6 on average on warm glow questions (3 questions).

Predict by ferritin

Code
# Barplot
data_scale_per2 <- data_scale %>%
 group_by(Fergroup) %>% 
  count(Warmglow) %>%
  mutate(
    Percentage = round(proportions(n) * 100, 1),
    res = str_c(n, "(", Percentage, ")%")
    )

ggplot(subset(data_scale_per2, !is.na(Warmglow)), aes(Fergroup, Percentage, fill = Warmglow)) +
  geom_col(position = "dodge") + 
  geom_text(aes(label = res), vjust = -0.2)+ 
  theme_bw()

Code
ggplot(data_scale , mapping = aes(x = Fergroup, fill = Warmglow)) +
  geom_bar(position = "dodge") + 
  theme_bw()

Code
# t-test + boxplot
res <- t.test(Ferritin ~ Warmglow, data = data_scale, var.equal = TRUE)
res

    Two Sample t-test

data:  Ferritin by Warmglow
t = -0.48007, df = 6602, p-value = 0.6312
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
 -2.511109  1.523153
sample estimates:
 mean in group No mean in group Yes 
         37.39408          37.88806 
Code
ggplot(subset(data_scale, !is.na(Warmglow)), aes(x=Warmglow, y=Ferritin)) +
  geom_boxplot() + 
  theme_bw() +
  geom_text(x = 2.3, y = 700, label=paste('Means:', "  ", means$Ferritin[1], "  ", means$Ferritin[2], "\n", 't(',res$parameter, '), p = ', round(res$p.value, 3), sep = '')) +
  labs(x="Warm glow", y="Ferritin")

Models

Code
WG_preF <- glm(Warmglow ~ Age + Weight + Height + Intervention_months_factor , data = data_scale, subset = data_scale$Gender == "Female" & data_scale$PostMeno == 0,family = binomial)
WG_postF <- glm(Warmglow ~ Age + Weight + Height + Intervention_months_factor , data = data_scale, subset = data_scale$Gender == "Female" & data_scale$PostMeno == 1,family = binomial)
WG_M <- glm(Warmglow ~ Age + Weight + Height + Intervention_months_factor , data = data_scale, subset = data_scale$Gender == "Male" ,family = binomial)

pl <- c(
  `(Intercept)` = "Intercept",
  `as.factor(Intervention_months_factor)1` = "6-12 months",
  `as.factor(Intervention_months_factor)2` = "12-18 months",
  `as.factor(Intervention_months_factor)3` = "18-24 months",
  `as.factor(Intervention_months_factor)4` = "24+ months"
  
)

tab_model(WG_preF, WG_postF, WG_M, pred.labels = pl, dv.labels = c("Pre-menopausal women", "Post-menopausal women", "Men"), title = "Warmglow, yes/no", show.reflvl = T)
Warmglow, yes/no
  Pre-menopausal women Post-menopausal women Men
Predictors Odds Ratios CI p Odds Ratios CI p Odds Ratios CI p
Intercept 1.57 1.27 – 1.95 <0.001 1.58 1.18 – 2.11 0.002 1.53 1.35 – 1.73 <0.001
Age 1.03 1.02 – 1.04 <0.001 1.01 1.00 – 1.03 0.108 1.02 1.02 – 1.03 <0.001
Weight 1.00 0.99 – 1.01 0.954 0.99 0.98 – 1.00 0.265 1.01 1.00 – 1.01 0.030
Height 1.00 0.99 – 1.02 0.596 0.99 0.97 – 1.01 0.320 1.00 0.99 – 1.01 0.761
Intervention_months_factor1 0.99 0.77 – 1.26 0.911 0.86 0.63 – 1.18 0.355 0.83 0.67 – 1.02 0.078
Intervention_months_factor2 1.41 1.11 – 1.80 0.005 0.80 0.60 – 1.07 0.135 0.97 0.79 – 1.19 0.767
Intervention_months_factor3 1.03 0.75 – 1.40 0.866 0.43 0.27 – 0.68 <0.001 0.72 0.55 – 0.95 0.019
Intervention_months_factor4 1.06 0.81 – 1.39 0.655 0.90 0.62 – 1.34 0.605 0.81 0.65 – 1.00 0.053
Observations 2167 1403 3386
R2 Tjur 0.016 0.014 0.030

Assumptions

Premenopausal females:

Code
### OUTLIERS
#Cook's distance
#plot 3largest
plot(WG_preF, which = 4, id.n = 3)

Code
# inspect the largest
model.data <- augment(WG_preF) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) # residuals not above 3, does not deserve closer attention
# A tibble: 3 × 13
  .rownames Warmglow Age[,1] Weight[,1] Height[,1] Intervention_months_factor
  <chr>     <fct>      <dbl>      <dbl>      <dbl> <fct>                     
1 275       No        -6.02        49.8       5.24 2                         
2 4031      No         0.985       36.8       1.24 3                         
3 5873      Yes      -24.0         48.8      -4.76 1                         
# ℹ 7 more variables: .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>,
#   .cooksd <dbl>, .std.resid <dbl>, index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
cooksd = cooks.distance(WG_preF)
n <- nrow(data_scale)
plot(cooksd, main = "Cooks Distance for Influential Obs")
abline(h = (8/n) * 3, lty = 2, col = "steelblue") # add cutoff line

Code
# infleunce plot
car::influencePlot(WG_preF,col="red",id.n=3)

       StudRes         Hat       CookD
275  -1.465201 0.012464664 0.003020593
339   1.203187 0.015112249 0.002036705
386  -1.550097 0.004144052 0.001205968
405  -1.552705 0.003995963 0.001169521
5873  1.301589 0.017863410 0.003021551
Code
car::influenceIndexPlot(WG_preF, id.n=3)

Code
#multicollinearity
car::vif(WG_preF) # no VIF value that exceeds 5 or 10 
                               GVIF Df GVIF^(1/(2*Df))
Age                        1.066877  1        1.032897
Weight                     1.207036  1        1.098652
Height                     1.139041  1        1.067259
Intervention_months_factor 1.002697  4        1.000337

Postmenopausal females:

Code
### OUTLIERS
#Cook's distance
#plot 3largest
plot(WG_postF, which = 4, id.n = 3)

Code
# inspect the largest
model.data <- augment(WG_postF) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) # residuals not above 3, does not deserve closer attention
# A tibble: 3 × 13
  .rownames Warmglow Age[,1] Weight[,1] Height[,1] Intervention_months_factor
  <chr>     <fct>      <dbl>      <dbl>      <dbl> <fct>                     
1 1032      Yes         2.98       57.8      -2.76 1                         
2 2675      No         11.0        55.8       1.24 0                         
3 5303      No         25.0        38.8      -7.76 2                         
# ℹ 7 more variables: .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>,
#   .cooksd <dbl>, .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance(WG_postF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow(data_scale)
plot(cooksd, main = "Cooks Distance for Influential Obs")
abline(h = (8/n) * 3, lty = 2, col = "steelblue") # add cutoff line

Code
# infleunce plot
car::influencePlot(WG_postF,col="red",id.n=3)

        StudRes         Hat       CookD
1032  1.1722690 0.030003491 0.003821410
2675 -1.3078974 0.023885123 0.004119193
4404  0.7155689 0.043294905 0.001676371
5494 -1.6515737 0.004852339 0.001766253
6518 -1.6384692 0.004893419 0.001730525
Code
car::influenceIndexPlot(WG_postF, id.n=3)

Code
#multicollinearity
car::vif(WG_postF) # no VIF value that exceeds 5 or 10 
                               GVIF Df GVIF^(1/(2*Df))
Age                        1.036025  1        1.017853
Weight                     1.159329  1        1.076721
Height                     1.180711  1        1.086605
Intervention_months_factor 1.018630  4        1.002310
Code
### OUTLIERS
#Cook's distance
#plot 3largest
plot(WG_M, which = 4, id.n = 3)

Code
# inspect the largest
model.data <- augment(WG_M) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) # residuals not above 3, does not deserve closer attention
# A tibble: 3 × 13
  .rownames Warmglow Age[,1] Weight[,1] Height[,1] Intervention_months_factor
  <chr>     <fct>      <dbl>      <dbl>      <dbl> <fct>                     
1 897       No         -9.02      66.8       23.2  2                         
2 1294      No         -9.02      -3.19     -56.8  2                         
3 1510      No         25.0       41.8       -6.76 2                         
# ℹ 7 more variables: .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>,
#   .cooksd <dbl>, .std.resid <dbl>, index <int>
Code
# Plot standardized residuals
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = Warmglow), alpha = .5) +
  theme_bw()

Code
# Filter potential influential data point
model.data %>% 
  filter(abs(.std.resid) > 3) # see zero influential data points
# A tibble: 0 × 13
# ℹ 13 variables: .rownames <chr>, Warmglow <fct>, Age <dbl[,1]>,
#   Weight <dbl[,1]>, Height <dbl[,1]>, Intervention_months_factor <fct>,
#   .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
#   .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance(WG_M)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow(data_scale)
plot(cooksd, main = "Cooks Distance for Influential Obs")
abline(h = (8/n) * 3, lty = 2, col = "steelblue") # add cutoff line

Code
# infleunce plot
car::influencePlot(WG_M,col="red",id.n=3)

       StudRes         Hat        CookD
897  -1.500435 0.008894675 0.0023248247
1294 -1.229454 0.032856980 0.0047869881
1510 -1.712966 0.005574479 0.0023231165
5998  0.853050 0.011983516 0.0006676027
8023 -1.704294 0.002536407 0.0010373587
Code
car::influenceIndexPlot(WG_M, id.n=3)

Code
#multicollinearity
car::vif(WG_M) # no VIF value that exceeds 5 or 10 
                               GVIF Df GVIF^(1/(2*Df))
Age                        1.086787  1        1.042491
Weight                     1.263387  1        1.124005
Height                     1.249615  1        1.117862
Intervention_months_factor 1.005663  4        1.000706

Fatigue

Predict by ferritin

Code
res <- cor.test(data_scale$CIS_Totalmean, data_scale$Ferritin,  method = "pearson", use = "complete.obs")
ggplot(data_scale, mapping = aes(x = CIS_Totalmean, y = Ferritin)) + 
  geom_point() + 
  theme_bw() +
  geom_text(x = 120, y = 1000, label=paste('r(',res$parameter,') = ', round(res$estimate,3),', p = ', round(res$p.value, 3), sep = '')) +
  labs(x="Fatigue, higher is worse", y="Ferritine")

Code
# plot for cutoffs
data_scale <- data_scale %>% mutate(Fergroup = factor(case_when(Ferritin < 15 ~ 1, Ferritin >= 15 & Ferritin <= 30~ 2, Ferritin > 30 ~ 3)) %>%  fct_recode("Ferritin < 15" = "1", "Ferritin 15-30" = "2", "Ferritin > 30" = "3"))

ggplot(data_scale, aes(x=Fergroup, y=CIS_Totalmean)) +
  geom_boxplot() + 
  theme_bw() +
  labs(x="Ferritin cut-off groups", y="Fatigue, higher is worse")

Models

Code
CIS_preF <- lm(CIS_Totalmean  ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$Gender == "Female" & data_scale$PostMeno == 0)
CIS_postF <- lm(CIS_Totalmean  ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$Gender == "Female" & data_scale$PostMeno == 1)
CIS_M <- lm(CIS_Totalmean  ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$Gender == "Male")

pl <- c(
  `(Intercept)` = "Intercept",
  `as.factor(Intervention_months_factor)1` = "6-12 months",
  `as.factor(Intervention_months_factor)2` = "12-18 months",
  `as.factor(Intervention_months_factor)3` = "18-24 months",
  `as.factor(Intervention_months_factor)4` = "24+ months"
  
)


tab_model(CIS_preF, CIS_postF, CIS_M, pred.labels = pl, dv.labels = c("Pre-menopausal women", "Post-menopausal women", "Men"), title = "Fatigue, higher score is worse, show.reflvl = T, digits = 3")
Fatigue, higher score is worse, show.reflvl = T, digits = 3
  Pre-menopausal women Post-menopausal women Men
Predictors Estimates CI p Estimates CI p Estimates CI p
Intercept 83.46 82.81 – 84.11 <0.001 84.63 83.76 – 85.50 <0.001 85.17 84.81 – 85.53 <0.001
Age 0.09 0.05 – 0.12 <0.001 0.07 0.02 – 0.12 0.003 0.09 0.08 – 0.10 <0.001
Weight 0.01 -0.01 – 0.03 0.460 0.02 -0.01 – 0.05 0.201 0.02 0.00 – 0.04 0.039
Height -0.02 -0.07 – 0.02 0.275 -0.07 -0.12 – -0.01 0.019 -0.00 -0.03 – 0.03 0.861
Intervention_months_factor1 -0.32 -1.05 – 0.41 0.392 -0.16 -1.11 – 0.78 0.737 -0.13 -0.74 – 0.47 0.665
Intervention_months_factor2 0.66 -0.07 – 1.39 0.077 0.18 -0.71 – 1.08 0.684 -0.64 -1.21 – -0.06 0.030
Intervention_months_factor3 -0.52 -1.43 – 0.40 0.266 0.14 -1.26 – 1.54 0.845 -0.86 -1.67 – -0.06 0.035
Intervention_months_factor4 -0.57 -1.35 – 0.22 0.160 -0.33 -1.48 – 0.82 0.574 -0.38 -1.00 – 0.23 0.224
Observations 2360 1471 3558
R2 / R2 adjusted 0.016 / 0.013 0.012 / 0.007 0.054 / 0.052

Assumptions

Premenopausal females:

Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity good,  constant variance (heteroscedasticity)
plot(CIS_preF, which = 1)

Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot (also known as  spread-location plot)
plot(CIS_preF, which = 3) # Linearity good,  constant variance 

Code
# formal test in the form of Breusch-Pagan 
bptest(CIS_preF) #  The constant variance assumption is not violated.

    studentized Breusch-Pagan test

data:  CIS_preF
BP = 11.086, df = 7, p-value = 0.1349
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot(CIS_preF, 2) #suspect

Code
# formal test in the form of Shapiro-Wilk
shapiro.test(resid(CIS_preF)) #fail to reject for the data sampled from normal

    Shapiro-Wilk normality test

data:  resid(CIS_preF)
W = 0.98288, p-value = 2.994e-16
Code
### OUTLIERS
#plot 3largest
plot(CIS_preF, which = 4, id.n = 3)

Code
# inspect the largest
model.data <- augment(CIS_preF) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
  .rownames CIS_Totalmean Age[,1] Weight[,1] Height[,1] Intervention_months_fa…¹
  <chr>             <dbl>   <dbl>      <dbl>      <dbl> <fct>                   
1 3740                122   -8.02      -2.19     -6.76  3                       
2 6127                 60  -15.0       31.8      -0.760 1                       
3 7695                107  -23.0       -9.19     -6.76  3                       
# ℹ abbreviated name: ¹​Intervention_months_factor
# ℹ 7 more variables: .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>,
#   .cooksd <dbl>, .std.resid <dbl>, index <int>
Code
# Plot standardized residuals
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = CIS_Totalmean), alpha = .5) +
  theme_bw()

Code
# Filter potential influential data point
model.data %>% 
  filter(abs(.std.resid) > 3) # some influential data points
# A tibble: 15 × 13
   .rownames CIS_Totalmean Age[,1] Weight[,1] Height[,1] Intervention_months_f…¹
   <chr>             <dbl>   <dbl>      <dbl>      <dbl> <fct>                  
 1 3006                 61  -13.0      -17.2      -6.76  4                      
 2 3213                 64   -2.02       8.81    -13.8   0                      
 3 3514                 61  -17.0      -15.2     -11.8   4                      
 4 3740                122   -8.02      -2.19     -6.76  3                      
 5 4126                 58  -10.0        1.81      1.24  0                      
 6 5337                 64   -3.02     -18.2     -11.8   0                      
 7 5578                 62  -17.0       -9.19     -3.76  4                      
 8 5674                101  -23.0        8.81      2.24  0                      
 9 6127                 60  -15.0       31.8      -0.760 1                      
10 6282                104  -14.0      -18.2      -1.76  2                      
11 6357                 61  -24.0      -17.2      -7.76  2                      
12 6859                 55  -16.0      -18.2      -8.76  3                      
13 7026                 58   -6.02     -13.2      -8.76  0                      
14 7239                 59  -24.0      -24.2     -11.8   0                      
15 7695                107  -23.0       -9.19     -6.76  3                      
# ℹ abbreviated name: ¹​Intervention_months_factor
# ℹ 7 more variables: .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>,
#   .cooksd <dbl>, .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance(CIS_preF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow(data_scale)
plot(cooksd, main = "Cooks Distance for Influential Obs")
abline(h = (8/n) * 3, lty = 2, col = "steelblue") # add cutoff line

Code
# check log transform to pass normality. 
data_scale2 <- data_scale
table(data_scale2$CIS_Totalmean)

              33               50               55               58 
               1                1                1                2 
              59               60               61               62 
               3                1                4                5 
              63 63.1578947368421               64               65 
               1                1               10               10 
65.4545454545455 65.5555555555556               66               67 
               1                1               20               10 
67.2727272727273 67.3684210526316               68 68.4210526315789 
               1                1               33                2 
              69 69.4736842105263               70 70.5263157894737 
              47                1               30                2 
              71 71.5789473684211               72 72.6315789473684 
              60                5               70                2 
              73 73.6842105263158               74 74.1176470588235 
              85               10              114                1 
74.7368421052632               75 75.2941176470588 75.7894736842105 
               6              140                1                5 
              76 76.8421052631579               77 77.7777777777778 
             162                7              186                1 
77.8947368421053               78 78.8235294117647 78.8888888888889 
              14              238                1                1 
78.9473684210526               79               80               81 
              11              246              301              342 
81.0526315789474 81.1111111111111 81.1764705882353            81.25 
               9                1                1                1 
              82 82.1052631578947 82.2222222222222               83 
             362               27                1              467 
83.1578947368421 83.3333333333333 83.5294117647059               84 
              40                1                1              489 
84.2105263157895 84.4444444444444 84.7058823529412               85 
              25                2                1              525 
85.2631578947368               86 86.3157894736842 86.6666666666667 
              30              773               20                5 
              87 87.3684210526316             87.5               88 
             542               27                1              517 
 88.421052631579 88.8888888888889               89 89.4736842105263 
              12                3              398               17 
              90 90.5263157894737 90.5882352941177               91 
             299               22                1              249 
91.1111111111111  91.578947368421               92 92.2222222222222 
               2                6              217                1 
92.6315789473684               93 93.3333333333333 93.6842105263158 
               9              152                2                9 
              94 94.1176470588235 94.7368421052632               95 
              97                1                6               96 
95.5555555555556 95.7894736842105               96 96.6666666666667 
               1                1               64                1 
96.8421052631579               97 97.8947368421053               98 
               7               35                6               38 
98.8888888888889 98.9473684210526               99              100 
               1                2               29                8 
             101 101.052631578947              102 102.105263157895 
              16                3                6                1 
             103              104 104.210526315789              105 
               7                4                1                1 
             106              107              108 108.421052631579 
               4                3                3                2 
             110 110.526315789474 112.631578947368              114 
               2                1                1                3 
             115              116 116.842105263158              117 
               2                4                1                2 
             119              121              122              123 
               1                1                1                1 
             125              128 128.888888888889              131 
               1                2                1                1 
             140 
               1 
Code
data_scale2$CIS_Totalmean <- log10(data_scale$CIS_Totalmean)
table(data_scale2$CIS_Totalmean)

1.51851393987789 1.69897000433602 1.74036268949424 1.76342799356294 
               1                1                1                2 
1.77085201164214 1.77815125038364 1.78532983501077 1.79239168949825 
               3                1                4                5 
1.79934054945358  1.8004276450948 1.80617997398389 1.81291335664286 
               1                1               10               10 
1.81593981127304 1.81660950220282 1.81954393554187 1.82607480270083 
               1                1               20               10 
1.82783903457275 1.82845636869504 1.83250891270624 1.83518975135401 
               1                1               33                2 
1.83884909073726 1.84182033025302 1.84509804001426 1.84835119741198 
              47                1               30                2 
1.85125834871908 1.85478530741739 1.85733249643127 1.86112548544841 
              60                5               70                2 
1.86332286012046 1.86737443472541 1.86923171973098 1.86992162373929 
              85               10              114                1 
1.87353474343023  1.8750612633917 1.87676104826959 1.87960889114242 
               6              140                1                5 
1.88081359228079 1.88559925483161 1.88649072517248 1.89085553057493 
             162                7              186                1 
1.89150811444213 1.89209460269048 1.89665587698653 1.89701583927975 
              14              238                1                1 
1.89733765810285 1.89762709129044 1.90308998699194 1.90848501887865 
              11              246              301              342 
1.90876711988363 1.90908035068113 1.90943016502296 1.90982336965091 
               9                1                1                1 
1.91381385238372 1.91437099740163 1.91498921029165 1.91907809237607 
             362               27                1              467 
1.91990348600159 1.92081875395238 1.92183942300478 1.92427928606188 
              40                1                1              489 
 1.9253663817031 1.92657108284147 1.92791357071698 1.92941892571429 
              25                2                1              525 
 1.9307614135898 1.93449845124357 1.93609024709487 1.93785209325116 
              30              773               20                5 
1.93951925261862 1.94135448708723 1.94200805302231 1.94448267215017 
             542               27                1              517 
1.94655568077303 1.94884747755262 1.94939000664491 1.95169532042545 
              12                3              398               17 
1.95424250943932 1.95677484595472 1.95707179945819 1.95904139232109 
             299               22                1              249 
1.95957134294439 1.96179564732977 1.96378782734556 1.96483558293675 
               2                6              217                1 
1.96675906686132 1.96848294855394 1.97003677662256 1.97166640135606 
               9              152                2                9 
 1.9731278535997 1.97367106127765 1.97651890415048 1.97772360528885 
              97                1                6               96 
1.98025594180424 1.98131778703225 1.98227123303957 1.98527674317929 
               1                1               64                1 
1.98606422205671 1.98677173426624 1.99075934326509 1.99122607569249 
               7               35                6               38 
1.99514749720559 1.99540424831085 1.99563519459755                2 
               1                2               29                8 
2.00432137378264 2.00454762775072 2.00860017176192  2.0090481289774 
              16                3                6                1 
2.01283722470517 2.01703333929878  2.0179115893087 2.02118929906994 
               7                4                1                1 
2.02530586526477 2.02938377768521 2.03342375548695 2.03511361941632 
               4                3                3                2 
2.04139268515822 2.04346569378109 2.05166017239636 2.05690485133647 
               2                1                1                3 
2.06069784035361 2.06445798922692 2.06759937349781 2.06818586174616 
               2                4                1                2 
2.07554696139253 2.08278537031645 2.08635983067475  2.0899051114394 
               1                1                1                1 
2.09691001300806 2.10720996964787 2.11021547978759 2.11727129565576 
               1                2                1                1 
2.14612803567824 
               1 
Code
#analaysis with log transformed
CIS_preF_factor_fix <- lm(CIS_Totalmean~ Age + Weight + Height + as.factor(Intervention_months_factor), 
                     data = data_scale2, 
                     subset = data_scale2$Gender == "Female" & data_scale2$PostMeno == 0)

qqnorm(resid(CIS_preF_factor_fix), col = "grey")
qqline(resid(CIS_preF_factor_fix), col = "dodgerblue", lwd = 2)

Code
shapiro.test(resid(CIS_preF_factor_fix))

    Shapiro-Wilk normality test

data:  resid(CIS_preF_factor_fix)
W = 0.97017, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.

#multicollinearity
car::vif(CIS_preF) # no VIF value that exceeds 5 or 10 
                               GVIF Df GVIF^(1/(2*Df))
Age                        1.066081  1        1.032512
Weight                     1.204323  1        1.097417
Height                     1.137514  1        1.066543
Intervention_months_factor 1.003466  4        1.000433

Postmenopausal females:*

Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity ok,  constant variance looks ok(heteroscedasticity)
plot(CIS_postF, which = 1)

Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot(CIS_postF, which = 3) # constant variance ok

Code
# formal test in the form of Breusch-Pagan 
bptest(CIS_postF) # we fail to reject the null of homoscedasticity. The constant variance assumption is not violated.

    studentized Breusch-Pagan test

data:  CIS_postF
BP = 11.319, df = 7, p-value = 0.1253
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot(CIS_postF, 2) #suspect

Code
# formal test in the form of Shapiro-Wilk
shapiro.test(resid(CIS_postF)) #fail to reject for the data sampled from normal

    Shapiro-Wilk normality test

data:  resid(CIS_postF)
W = 0.94034, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot(CIS_postF, which = 4, id.n = 3)

Code
# inspect the largest
model.data <- augment(CIS_postF) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
  .rownames CIS_Totalmean Age[,1] Weight[,1] Height[,1] Intervention_months_fa…¹
  <chr>             <dbl>   <dbl>      <dbl>      <dbl> <fct>                   
1 1208                140   12.0        6.81     -13.8  1                       
2 6761                123    9.98      -2.19     -13.8  4                       
3 7896                128    6.98     -18.2       -4.76 4                       
# ℹ abbreviated name: ¹​Intervention_months_factor
# ℹ 7 more variables: .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>,
#   .cooksd <dbl>, .std.resid <dbl>, index <int>
Code
# Plot standardized residuals
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = CIS_Totalmean), alpha = .5) +
  theme_bw()

Code
# Filter potential influential data point
model.data %>% 
  filter(abs(.std.resid) > 4) # some influential data points
# A tibble: 8 × 13
  .rownames CIS_Totalmean Age[,1] Weight[,1] Height[,1] Intervention_months_fa…¹
  <chr>             <dbl>   <dbl>      <dbl>      <dbl> <fct>                   
1 1208                140   12.0       6.81      -13.8  1                       
2 1606                116   11.0      -0.189     -10.8  3                       
3 1881                116   19.0       4.81       -8.76 2                       
4 2120                 59    9.98    -13.2        -6.76 0                       
5 6229                115    4.98    -18.2       -16.8  0                       
6 6761                123    9.98     -2.19      -13.8  4                       
7 7035                116   22.0     -13.2         1.24 1                       
8 7896                128    6.98    -18.2        -4.76 4                       
# ℹ abbreviated name: ¹​Intervention_months_factor
# ℹ 7 more variables: .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>,
#   .cooksd <dbl>, .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance(CIS_postF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow(data_scale)
plot(cooksd, main = "Cooks Distance for Influential Obs")
abline(h = (8/n) * 3, lty = 2, col = "steelblue") # add cutoff line

Code
#analaysis with log transformed
CIS_postF_factor_fix <- lm(CIS_Totalmean~ Age + Weight + Height + as.factor(Intervention_months_factor), 
                     data = data_scale2, 
                     subset = data_scale2$Gender == "Female" & data_scale2$PostMeno == 1)

qqnorm(resid(CIS_postF_factor_fix), col = "grey")
qqline(resid(CIS_postF_factor_fix), col = "dodgerblue", lwd = 2)

Code
shapiro.test(resid(CIS_postF_factor_fix))

    Shapiro-Wilk normality test

data:  resid(CIS_postF_factor_fix)
W = 0.95649, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.

#multicollinearity
car::vif(CIS_postF) # no VIF value that exceeds 5 or 10 
                               GVIF Df GVIF^(1/(2*Df))
Age                        1.035449  1        1.017570
Weight                     1.149008  1        1.071918
Height                     1.173224  1        1.083155
Intervention_months_factor 1.012342  4        1.001534

Males:

Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Good
plot(CIS_M, which = 1)     # Linearity ok,  constant variance looks ok(heteroscedasticity)

Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot(CIS_M, which = 3) # Linearity ok,  constant variance looks ok(heteroscedasticity)

Code
# formal test in the form of Breusch-Pagan 
bptest(CIS_M)# small p-value, so we  reject the null of homoscedasticity. The constant variance assumption is violated.

    studentized Breusch-Pagan test

data:  CIS_M
BP = 16.758, df = 7, p-value = 0.01903
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot(CIS_M, 2) # suspect

Code
# formal test in the form of Shapiro-Wilk
shapiro.test(resid(CIS_M)) #reject for the data sampled from normal

    Shapiro-Wilk normality test

data:  resid(CIS_M)
W = 0.9592, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot(CIS_M, which = 4, id.n = 3)

Code
# inspect the largest
model.data <- augment(CIS_M) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
  .rownames CIS_Totalmean Age[,1] Weight[,1] Height[,1] Intervention_months_fa…¹
  <chr>             <dbl>   <dbl>      <dbl>      <dbl> <fct>                   
1 1006                128    26.0      35.8        3.24 1                       
2 1603                125    12.0       6.81      17.2  3                       
3 5561                 33    29.0      -7.19      -5.76 4                       
# ℹ abbreviated name: ¹​Intervention_months_factor
# ℹ 7 more variables: .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>,
#   .cooksd <dbl>, .std.resid <dbl>, index <int>
Code
# Plot standardized residuals
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = CIS_Totalmean), alpha = .5) +
  theme_bw()

Code
# Filter potential influential data point
model.data %>% 
  filter(abs(.std.resid) > 4) # some influential data points
# A tibble: 12 × 13
   .rownames CIS_Totalmean Age[,1] Weight[,1] Height[,1] Intervention_months_f…¹
   <chr>             <dbl>   <dbl>      <dbl>      <dbl> <fct>                  
 1 697                129.  12.0       14.8        11.2  0                      
 2 1006               128   26.0       35.8         3.24 1                      
 3 1089               116   22.0       19.8         9.24 1                      
 4 1603               125   12.0        6.81       17.2  3                      
 5 1807               117   16.0       -8.19       -8.76 2                      
 6 2028               114   26.0       -9.19       -8.76 0                      
 7 3288               119   22.0       24.8         6.24 0                      
 8 4216               121   14.0        0.811       5.24 4                      
 9 4969               117    2.98      29.8         1.24 1                      
10 5561                33   29.0       -7.19       -5.76 4                      
11 7370                50    0.985     11.8         8.24 1                      
12 8028               114    0.985     12.8        -6.76 0                      
# ℹ abbreviated name: ¹​Intervention_months_factor
# ℹ 7 more variables: .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>,
#   .cooksd <dbl>, .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance(CIS_M)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow(data_scale)
plot(cooksd, main = "Cooks Distance for Influential Obs")
abline(h = (8/n) * 3, lty = 2, col = "steelblue") # add cutoff line

Code
#analaysis with log transformed
CISMfactor_fix <- lm(CIS_Totalmean~ Age + Weight + Height + as.factor(Intervention_months_factor), 
                     data = data_scale2, 
                     subset = data_scale2$Gender == "Female" & data_scale2$PostMeno == 1)

qqnorm(resid(CISMfactor_fix), col = "grey")
qqline(resid(CISMfactor_fix), col = "dodgerblue", lwd = 2)

Code
shapiro.test(resid(CISMfactor_fix))

    Shapiro-Wilk normality test

data:  resid(CISMfactor_fix)
W = 0.95649, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.


#multicollinearity
car::vif(CIS_M) # no VIF value that exceeds 5 or 10 
                               GVIF Df GVIF^(1/(2*Df))
Age                        1.092206  1        1.045087
Weight                     1.272488  1        1.128046
Height                     1.251062  1        1.118509
Intervention_months_factor 1.007520  4        1.000937

::: {.callout-note appearance=“default”} ### Decision

We conclude that the assumptions were violated and thus we use robust models. :::

Robust models

Code
CIS_preF_robust <- lmrob(CIS_Totalmean ~ Age + Weight + Height + as.factor(Intervention_months_factor) , data = data_scale, subset  = data_scale$Gender == "Female" & data_scale$PostMeno == 0)
CIS_postF_robust <- lmrob(CIS_Totalmean ~ Age + Weight + Height + as.factor(Intervention_months_factor) , data = data_scale, subset  = data_scale$Gender == "Female" & data_scale$PostMeno == 1)
CIS_M_robust <- lmrob(CIS_Totalmean ~ Age + Weight + Height + as.factor(Intervention_months_factor) , data = data_scale, subset  = data_scale$Gender == "Male")

pl <- c(
  `(Intercept)` = "Intercept",
  `as.factor(Intervention_months_factor)1` = "6-12 months",
  `as.factor(Intervention_months_factor)2` = "12-18 months",
  `as.factor(Intervention_months_factor)3` = "18-24 months",
  `as.factor(Intervention_months_factor)4` = "24+ months"
  
)


tab_model(CIS_preF_robust, CIS_postF_robust, CIS_M_robust, pred.labels = pl, dv.labels = c("Pre-menopausal women", "Post-menopausal women", "Men"), title = "Fatigue, higher score is worse (robust regression)", show.reflvl = T, digits = 3)
Fatigue, higher score is worse (robust regression)
  Pre-menopausal women Post-menopausal women Men
Predictors Estimates CI p Estimates CI p Estimates CI p
Intercept 83.728 83.133 – 84.323 <0.001 84.805 84.045 – 85.565 <0.001 85.311 84.982 – 85.641 <0.001
Age 0.087 0.053 – 0.121 <0.001 0.073 0.029 – 0.117 0.001 0.080 0.067 – 0.092 <0.001
Weight 0.005 -0.019 – 0.029 0.701 0.014 -0.011 – 0.039 0.273 0.012 -0.007 – 0.031 0.217
Height -0.022 -0.066 – 0.022 0.321 -0.051 -0.100 – -0.002 0.042 0.001 -0.027 – 0.029 0.945
6-12 months -0.332 -1.051 – 0.387 0.366 -0.290 -1.094 – 0.515 0.480 -0.403 -0.921 – 0.116 0.128
12-18 months 0.707 -0.030 – 1.444 0.060 0.324 -0.447 – 1.094 0.410 -0.502 -1.015 – 0.011 0.055
18-24 months -0.855 -1.751 – 0.040 0.061 -0.158 -1.313 – 0.996 0.788 -0.784 -1.610 – 0.042 0.063
24+ months -0.377 -1.172 – 0.418 0.352 -0.491 -1.536 – 0.554 0.357 -0.271 -0.820 – 0.278 0.334
Observations 2360 1471 3558
R2 / R2 adjusted 0.019 / 0.016 0.015 / 0.010 0.055 / 0.053